MFCC特征提取理解

摘自:http://blog.sina.com.cn/s/blog_6a25068b0100m5m6.html

作者:幸福的理由

看了个稀里糊涂,一知半解,觉得以后又会忘记。珍藏下来。边理解边添加。

以下是mfcc_float.cpp的内容

#include "stdlib.h" #include "stdio.h" #include "math.h" #include "MFCC_float.h"

#define PI 3.14159265358979 #define PRE_EMP_FACTOR 0.98 #define ENE_FLOOR  1.0

//------------------------------------------------------------------------------------- //public functions

//constructor CMFCC::CMFCC(long aInfo, int aSampleRate, int aN,     int aFl, int aFh, int aM, int aTypeOfFilterBank, int aP) {  //general     Info=aInfo;  SampleRate=aSampleRate; //sample rate  N=aN; M=aM; P=aP; //fft point number, filterbanks number, cepstrum order  Fl=aFl; Fh=aFh;   //the lowest and highest frequency (Hz) of all the filterbanks    //pre-emphasize  Alfa=PRE_EMP_FACTOR;  xtmp=new double[N];

 //FFT  x=new COMPLEX[N];  X=new double[N];  HammingWin=NewHammingWin(N);

 //triangular filterbanks  if(aTypeOfFilterBank==MEL_SCALE)   NewMelFilterBanks(SampleRate, Fl, Fh, N, M, FilterBanks, FilterBanksL, FilterBanksH);  else   NewLinFilterBanks(SampleRate, Fl, Fh, N, M, FilterBanks, FilterBanksL, FilterBanksH);

 S=new double[M];

 //DCT  DctMatrix=NewDctMatrix(P,M);

 //feature temp buffer  pfFea=new double[P]; }

//destructor CMFCC::~CMFCC(void) {  delete xtmp;  delete x;  delete X;  delete HammingWin;  delete FilterBanks[0];  delete FilterBanks;  delete FilterBanksL;  delete FilterBanksH;  delete S;  delete DctMatrix[0];  delete DctMatrix;  delete pfFea; }

//print void CMFCC::Print(void) {  int i,j;  FILE *fp;

 fp=fopen("HammingWin.txt", "wt");  for(i=0 ; i<N ; i++)   fprintf(fp, "%.6f\n", HammingWin[i]);  fclose(fp);

 fp=fopen("FilterBanks.txt", "wt");  for(j=0 ; j<N/2 ; j++)  {   for(i=0 ; i<M ; i++)    fprintf(fp, ".6f ", FilterBanks[i][j]);   fprintf(fp, "\n");  }  fclose(fp);

 fp=fopen("FilterBanksLH.txt", "wt");  for(i=0 ; i<M ; i++)   fprintf(fp, "%d\t%d\t%d\n", FilterBanksL[i], FilterBanksH[i], (FilterBanksH[i]-FilterBanksL[i]+1));  fclose(fp);

 fp=fopen("DCTMatrix.txt", "wt");  for(i=0 ; i<P ; i++)  {   for(j=0 ; j<M ; j++)    fprintf(fp, "%.6f\t", DctMatrix[i][j]);   fprintf(fp, "\n");  }  fclose(fp);

 return; }

//extract one sentence of MFCC/LFCC int CMFCC::MFCCbySentence(short *pnWav, int SampleCount,         float *&pfMFCC, int &FrmCount, int &Dim,         int FrmWidth, int FrmOffst) {  int i;  if(FrmWidth>N || FrmOffst>FrmWidth) return FAILURE;

 //compute frame count & dim of feature vector & allocate memory space  FrmCount=(SampleCount-FrmWidth)/FrmOffst+1;  if(FrmCount<=0)   return FAILURE;  if(Info&DCEPS)  {   if(FrmCount<=10) return FAILURE;  }

 Dim=P;  if(Info&DCEPS) Dim+=P;  if(Info&DDCEPS) Dim+=P;  pfMFCC = new float[FrmCount*Dim];

 //remove DC component  RemoveDC(pnWav, SampleCount);

 //extract one sentence of MFCC  for(i=0 ; i<FrmCount ; i++)     {   MFCCbyFrame(&pnWav[i*FrmOffst], FrmWidth, &pfMFCC[i*Dim], P);     }

 //CMS  if(Info&CMS)  {   CepsMeanSub(pfMFCC, FrmCount, Dim, P);  }

 //extract one sentence of DMFCC  if(Info&DCEPS)  {   DMFCCbyFrame(&pfMFCC[0], &pfMFCC[0], &pfMFCC[0], &pfMFCC[1*Dim], &pfMFCC[2*Dim],       &pfMFCC[P], P);   DMFCCbyFrame(&pfMFCC[0], &pfMFCC[0], &pfMFCC[1*Dim], &pfMFCC[2*Dim], &pfMFCC[3*Dim],       &pfMFCC[1*Dim+P], P);   for(i=2 ; i<=FrmCount-3 ; i++)    DMFCCbyFrame(&pfMFCC[(i-2)*Dim], &pfMFCC[(i-1)*Dim], &pfMFCC[i*Dim], &pfMFCC[(i+1)*Dim], &pfMFCC[(i+2)*Dim],        &pfMFCC[i*Dim+P], P);   DMFCCbyFrame(&pfMFCC[(FrmCount-4)*Dim], &pfMFCC[(FrmCount-3)*Dim],       &pfMFCC[(FrmCount-2)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim],       &pfMFCC[(FrmCount-2)*Dim+P], P);   DMFCCbyFrame(&pfMFCC[(FrmCount-3)*Dim], &pfMFCC[(FrmCount-2)*Dim],       &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim],       &pfMFCC[(FrmCount-1)*Dim+P], P);  }

 //extract one sentence of DDMFCC  if(Info&DDCEPS)  {   for(i=0 ; i<P ; i++)   {    pfMFCC[2*P+i] = pfMFCC[(FrmCount-1)*Dim+2*P+i] = 0;   }

  for(i=1 ; i<=FrmCount-2 ; i++)   {    DDMFCCbyFrame(&pfMFCC[(i-1)*Dim+P], &pfMFCC[i*Dim+P], &pfMFCC[(i+1)*Dim+P],         &pfMFCC[i*Dim+2*P], P);   }  }

 return SUCCESS; }

//compute one frame of MFCC/LFCC int CMFCC::MFCCbyFrame(short *pnWav, int FrmWid, float *pfMFCC, int CepsOrder) {  int i;

 if(FrmWid>N || CepsOrder!=P) return FAILURE;

 //pre-emphasize  预加重  PreEmp(pnWav, FrmWid, xtmp, N, Alfa);

 //apply hamming window   加窗(窗宽度,  ApplyWindow(xtmp, HammingWin, N, xtmp);  //FFT  做fft  for(i=0 ; i<N ; i++)  {   x[i].real=xtmp[i];  //实部,虚部??   x[i].image=0;  }  FFTAmp(x, X, N);

 //apply triangular windows & ln  ApplyFilterBanks(X, N, S, M,       FilterBanks, FilterBanksL, FilterBanksH);

 //DCT  ApplyDCT(S, M, pfFea, P, DctMatrix);

 //double -> float  for(i=0 ; i<CepsOrder ; i++)   pfMFCC[i] = (float)pfFea[i];    return SUCCESS; }

//extract one frame of DMFCC void CMFCC::DMFCCbyFrame(float *Prev2Fea, float *Prev1Fea,        float *CurrFea,        float *Next1Fea, float *Next2Fea,        float *DFea, int P) {  int i;

 for(i=0 ; i<P ; i++)   DFea[i] = (-0.632456f*Prev2Fea[i]-0.316228f*Prev1Fea[i]+ \               0.316228f*Next1Fea[i]+0.632456f*Next2Fea[i]);

 return; }

//extract one frame of DDMFCC void CMFCC::DDMFCCbyFrame(float *PrevDFea, float *CurrDFea, float *NextDFea,         float *DDFea, int P) {  int i;

 for(i=0 ; i<P ; i++)   DDFea[i] = 0.7071f*(NextDFea[i]-PrevDFea[i]);

 return; }

//------------------------------------------------------------------------------------- //private functions

//====== Pre-emphasize ======// void CMFCC::PreEmp(short *pnWav, int FrmWid, double *x, int N, double Alfa) {  int i;  int s=(N-FrmWid)/2; //start position of data in COMPLEX x[N]  s=(512-帧宽)/2

 //clear buffer  for(i=0 ; i<N ; i++)  {   x[i]=0;  }  

 //compute the first sample  x[s] = pnWav[0]-Alfa*pnWav[0];   //y(n)=x(0)-a*x(0);  第一个点

 //compute the following samples in one frame  for(i=1 ; i<FrmWid ; i++)   x[i+s] = pnWav[i]-Alfa*pnWav[i-1];  //每个帧宽中的余下点,x(i+s)=后一个wav-前一个wav*a

 return; }

//====== Hamming window ======// //generate a hamming window double* CMFCC::NewHammingWin(int Len) {  double* Win=new double[Len];  for(int i=0; i<Len; i++)   Win[i]=(0.54-0.46*cos(2*PI*i/Len));  return Win; }

//applying a window void CMFCC::ApplyWindow(double *In, double *Win, int Len, double *Out) {   for(int i=0; i<Len; i++)     Out[i]=In[i]*Win[i];  //对于音频,乘以窗函数 };

 

//====== FFT ======// //Func: FFT 2DIT //In: COMPLEX *input.real; //one frame of wave data //  int n; //Point number of FFT //Out: COMPLEX *input; //real part and image part after FFT 实部和虚部 void FFT(COMPLEX *input, int n) {  int x, i, nv2, j, k, le, l, le1, ip, nm1;  COMPLEX t, u, w;  //复数

    int ntemp=n;  for(x=0; n>1; x++)   n/=2;     n=ntemp;

 nv2=n/2;  nm1=n-1;    j=1;

 for(i=1; i<=nm1; i++)  {   if(i<j)  //i是前一个点,j是后一个点   {    t.real = input[i-1].real;    t.image = input[i-1].image;    input[i-1].real = input[j-1].real;     input[i-1].image = input[j-1].image;    input[j-1].real = t.real;    input[j-1].image = t.image;  //后一个点被前一个点代替   }

  k=nv2;

  while(k<j)   {    j-=k;    k/=2;   }   j+=k;  }

 le=1;  for(l=1; l<=x; l++)  {   le*=2;      le1=le/2;   u.real = 1.0f;   u.image = 0.0f;   w.real = (float)cos(PI/le1);   w.image = (float)-sin(PI/le1);

  for(j=1; j<=le1; j++)   {    for(i=j; i<=n; i+=le)    {     ip = i+le1;     t.real = input[ip-1].real*u.real-input[ip-1].image*u.image;     t.image = input[ip-1].real*u.image+input[ip-1].image*u.real;     input[ip-1].real = input[i-1].real-t.real;     input[ip-1].image = input[i-1].image-t.image;     input[i-1].real = t.real+input[i-1].real;     input[i-1].image = t.image+input[i-1].image;    }

   t.real = u.real*w.real-u.image*w.image;    t.image = u.image*w.real+u.real*w.image;    u.real = t.real;         u.image = t.image;   }  }

 return; }

//Func: FFTAmp, compute the amplitude of spectrum  急速频率谱的幅度 //In: COMPLEX *x.real; //one frame of wave data 一帧的wav //  int N; //point number of FFT   做fft的点数 //Out: double *Amp; //spectral amplitude of this frame   这一帧的谱幅度 void CMFCC::FFTAmp(COMPLEX *x, double *Amp, int N)  //实部和虚部的平方和开方,求模 {  //FFT  FFT(x, N);

 //sqrt  开方  for(int i=0 ; i<=N/2 ; i++)   Amp[i]=sqrt(x[i].real*x[i].real + x[i].image*x[i].image);

 return; }

 

//====== DCT ======// //generate the matrix of DCT, excluding 0'th dimension double** CMFCC::NewDctMatrix(int nP, int nM) {  int p,m;  double **Matrix=new double*[nP];  Matrix[0]=new double[nP*nM];  for(p=1 ; p<nP ; p++) Matrix[p]=&Matrix[0][p*nM];

 for(p=0 ; p<nP ; p++)  {   for(m=0 ; m<nM ; m++)   {    Matrix[p][m]=cos(PI*p*(m+0.5)/nM);  //including c0 //   Matrix[p][m]=cos(PI*(p+1)*(m+0.5)/nM); //excluding c0   }  }

 return Matrix; }

//applying a transformation matrix  逆傅里叶变换 void CMFCC::ApplyDCT(double *In, int InDim, double *Out, int OutDim, double **Matrix) {  int p,m;

 for(p=0 ; p<OutDim ; p++)  {   Out[p]=0;   for(m=0 ; m<InDim ; m++)   {    Out[p]+=In[m]*Matrix[p][m];   }  }

 return; }

 

//====== Generating Filterbanks on Mel-Scale & Linear-Scale ======// //FFT point to frequency in Hz double n2f(int n, int Fs, int N) {  if(n<0||n>=N) return 0;

 double f;  f=(n+0.5)*Fs/N;  return f; }

//frequency in Hz to FFT point int f2n(double f, int Fs, int N) {  if(f<0||f>Fs) return 0;

 int n;  n=(int)(f*N/Fs+0.5);  return n; }

//frequency in Hz to bark double f2b(double f) {  if(f<0) return 0;

 double b;  b=1125*log(1+f/700.0);  return b; }

//bark to frequency in Hz  将bark换成频率 double b2f(double b) {  if(b<0) return 0;

 double f;  f=700*(exp(b/1125)-1);  return f; }

//compute the central point of each filterbank on mel-scale //for M filterbanks, the central point shoule be extended to M+2 int* MelCentralPoint(int Fl, int Fh, int M,       int Fs, int N) {  int m;  double F;  int *CentralPoint=new int[M+2];  for(m=0 ; m<=M+1 ; m++)  {   //central frequency  中心频率,高-低加低,乘以m数/点数   F=b2f(f2b(Fl)+(f2b(Fh)-f2b(Fl))*m/(M+1));   //central point   CentralPoint[m]=f2n(F, Fs, N);  }  return CentralPoint; }

//compute the central point of each filterbank on linear-scale //for M filterbanks, the central frequency shoule be extended to M+2 int* LinCentralPoint(int Fl, int Fh, int M,       int Fs, int N) {  int m;  double F;  int *CentralPoint=new int[M+2];  for(m=0 ; m<=M+1 ; m++)  {   //central frequency   F=Fl+(Fh-Fl)*m*1.0/(M+1);   //central point   CentralPoint[m]=f2n(F, Fs, N);  }  return CentralPoint; }

//generate the one filter void GenTriangularFilter(double *FilterBank,        int ns, int nm1, int nm2, int nm3, int ne) {  int n;

 for(n=ns  ; n<nm1 ; n++) FilterBank[n]=0;  for(n=nm1 ; n<nm2 ; n++) FilterBank[n]=1.0*(n-nm1)/(nm2-nm1);  for(n=nm2 ; n<nm3 ; n++) FilterBank[n]=1.0*(nm3-n)/(nm3-nm2);  for(n=nm3 ; n<ne  ; n++) FilterBank[n]=0;

 return; }

//Generate a set of mel-scale filterbanks 一个mel滤波器 //In: int Fs; //sample rate采样率 //  int Fl; //the lowest frequency of all the filterbanks最低频率 //  int Fh; //the highest frequency of all the filterbanks //  int N; //FFT point  fft点数 //  int M; //number of filterbanks  滤波器的个数 //Out: double **FilterBanks; //Triangular filters   三角滤波器 //  int *FilterBanksL;  //Lower point number of filters  低一级的滤波器 //  int *FilterBanksH;  //Higher point number of filters  高一级的滤波器 void CMFCC::NewMelFilterBanks(int Fs, int Fl, int Fh, int N, int M,          double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH) {  int m;

 //allocate memory space for filters  为滤波器分配空间  FilterBanksL=new int[M];   FilterBanksH=new int[M];  FilterBanks=new double*[M];  FilterBanks[0]=new double[M*N];  for(m=1 ; m<M ; m++) FilterBanks[m]=&FilterBanks[0][m*N];  //每个滤波器做傅里叶变换的点数是n

 //compute the central points of filterbanks, totall M+2 points  计算滤波器的中心点,m个滤波器,是m+2个点  int *MelCenPoints=MelCentralPoint(Fl,Fh,M,Fs,N);  for(m=0 ; m<M ; m++)  {   FilterBanksL[m]=MelCenPoints[m];   FilterBanksH[m]=MelCenPoints[m+2];   GenTriangularFilter(FilterBanks[m], 0, MelCenPoints[m], MelCenPoints[m+1], MelCenPoints[m+2], N-1);  }

 delete MelCenPoints;

 return; }

//Generate a set of linear-scale filterbanks void CMFCC::NewLinFilterBanks(int Fs, int Fl, int Fh, int N, int M,          double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH) {  int m;

 //allocate memory space for filters  FilterBanksL=new int[M];  FilterBanksH=new int[M];  FilterBanks=new double*[M];  FilterBanks[0]=new double[M*N];  for(m=1 ; m<M ; m++) FilterBanks[m]=&FilterBanks[0][m*N];

 //compute the central points of filterbanks, totall M+2 points  int *LinCenPoints=LinCentralPoint(Fl,Fh,M,Fs,N);  for(m=0 ; m<M ; m++)  {   FilterBanksL[m]=LinCenPoints[m];   FilterBanksH[m]=LinCenPoints[m+2];   GenTriangularFilter(FilterBanks[m], 0, LinCenPoints[m], LinCenPoints[m+1], LinCenPoints[m+2], N-1);  }

 delete LinCenPoints;

 return; }

 

//====== Applying Filterbanks & Log Compression ======// 加滤波器,对数化 void CMFCC::ApplyFilterBanks(double *X, int N, double *S, int M,         double **FilterBanks, int *FilterBanksL, int *FilterBanksH) {  int m,n;  int nl, nh;

 for(m=0 ; m<M ; m++)  {   //get the lowest and the highest point number of mth filterbank 得到第m个滤波器的最低和最高点   nl=FilterBanksL[m];  nh=FilterBanksH[m];

  //error   if(nl<0 || nh>=N || nl>=nh)   {    S[m]=0;    continue;   }

  //filter the signal with mth filterbank   S[m]=0;   for(n=nl ; n<=nh ; n++)    S[m]+=X[n]*FilterBanks[m][n];

  //log   if(S[m]<ENE_FLOOR) S[m]=ENE_FLOOR;   S[m]=log(S[m]);  }

 return; }

//====== CMS by Sentence ======//  得到均值 void CMFCC::CepsMeanSub(float *pfMFCC, int FrmCount, int Dim, int CepsOrder) {  int i,j;

 float *Mean=new float[CepsOrder];

 for(i=0 ; i<CepsOrder ; i++)  {   Mean[i]=0;   for(j=0 ; j<FrmCount ; j++)    Mean[i]+=pfMFCC[j*Dim+i];   Mean[i]/=FrmCount;  }

 for(i=0 ; i<CepsOrder ; i++)   for(j=0 ; j<FrmCount ; j++)    pfMFCC[j*Dim+i]-=Mean[i];

 delete Mean;

 return; }

//====== Remove DC component ======// void CMFCC::RemoveDC(short *pnWav, int SampleCount) {  if(pnWav==NULL || SampleCount<=0) return;

 int i;  float sum=0, wav;

 for (i=0 ; i<SampleCount ; i++)  {   sum += pnWav[i];  }  sum /= SampleCount;

 for (i=0 ; i<SampleCount ; i++)  {   wav = pnWav[i]-sum;   pnWav[i] = (short)wav;  }

 return; }

 

 

以下是mfcc_float.h的内容

由此可看出,本程序是mfcc特征和差分mfcc的结合,先提取mfcc,在对提取到的特征向量做差分,进一步剥离特征,目的是为了得到说话人言语和速度的变化啊啥的巴拉巴拉。看了一遍程序,总容易各种分心,太多不知道所以看得很晕。这个盒子,总算打开了。

首先,是预加重,是说为了减少嘴唇和声带的效应影响,加重高频部分,理解来就是消除唇部的摩擦,对真实的语音的频率做补偿。方法是y(n)=x(n)-a*x(n-1).这里a是,alfa系数,在0.9到1之间,一般取0.98。

其次是加窗分帧。加窗,又设计到语音的特征巴拉巴拉。语音在长范围内是不停变动的,没有固定的特性无法做处理,所以加个窗,窗外的值设定为0,这样就吧要处理的部分固定在窗内,做处理。这样的窗将分析帧,一般取10-30ms做为窗,为了避免窗边界对信号的遗漏,要对帧做偏移时候,要有帧迭,所以一般取帧长的一半作为帧移。加的窗是汉明窗。公式是在加窗范围内,

w(n)=0.54-0.46*cos(2*pi*n/(n-1)).用汉明窗是因为在时域,是信号乘以窗函数,所以它两端是平滑过渡到0,不像矩形窗变化剧烈,而在频率它能保留高频成分,保留细节信息。

再次就是提取特征了。步骤为先离散傅里叶变换,将信号变换到频域上。这里涉及到dft变换的过程,是实部虚部分别作处理。程序中DFT一段需要细细研究,没看懂。

离散傅里叶变换后得到信号的频谱,然后对它的幅度做平方,就是能量谱。

啥是能量谱?

在这一步步的追溯到太多问题了,查阅资料也各种明白了点,但更多的是一知半解。收集以后一点点明白。http://longer.spaces.eepw.com.cn/articles/article/item/71979 

对于能量信号,常用能量谱来描述。所谓的能量谱,也称为能量谱密度,是指用密度的概念表示信号能量在各频率点的分布情况。也即是说,对能量谱在频域上积分就可以得到信号的能量。能量谱是信号幅度谱的模的平方,其量纲是焦/赫。对于功率信号,常用功率谱来描述。所谓的功率谱,也称为功率谱密度,是指用密度的概念表示信号功率在各频率点的分布情况。也就是说,对功率谱在频域上积分就可以得到信号的功率。从理论上来说,功率谱是信号自相关函数的傅里叶变换。因为功率信号不满足傅里叶变换的条件,其频谱通常不存在,维纳-辛钦定理证明了自相关函数和傅里叶变换之间对应关系。在工程实际中,即便是功率信号,由于持续的时间有限,可以直接对信号进行傅里叶变换,然后对得到的幅度谱的模求平方,再除以持续时间来估计信号的功率谱。】 又学习了。

啥是能量信号?

【当且仅当f(t)在所有时间上的能量不为0且有限时,该信号为能量信号,即(1)式中的 T 趋于无穷大的时候E为有限。典型的能量信号如方波信号、三角波信号等。但是有些信号不满足能量信号的条件,如周期信号和能量无限的随机信号,此时就需要用功率来描述这类信号。当且仅当x(t)在所有时间上的功率不为0且有限时,该信号为功率信号】一般来说,周期信号和随机信号是功率信号,而非周期的确定信号是能量信号。

因为语音信号是非周期的,且有人的能量。所以它是能量信号。

啥是能量?啥是功率?

功率是单位时间做的功,单位时间的能量,能量是连续时间内的功,能量就是对功率在一段时间内做积分。

 

再回去,得到能量谱后,用一组mel尺度的三角形滤波器组对能量谱做滤波。滤波器的个数为24-40个。 每个滤波器有个中心频率。m个数小的时候他们间隔小,m多的时候间隔大。他们有交叠,当前滤波器的中心频率是上一个的最高频率,也是下一个的最低频率。

这样得到每个滤波器的输出,滤波器的传递函数有公式。因为是mel尺度的频率,要化为线性频率,所以有个f2b.

每个滤波器的中心频率是线性的。用这些滤波器来模拟人的听觉特性,将线性频率转化为mel频率,是B(f)=1125ln(1+f/700)。这样。就可以计算出对应的mel频率滤波器界限。进而得到滤波器函数,乘以能量谱,再取对数就是滤波器的输出。

然后,对输出做dct变换,是离散余弦变换。

啥是离散余弦变换?dct?

看了百度,有点理解。说是形成一个dct矩阵,左上角是重要部分,右下角是非重要部分,甚至可以抛弃,这样对压缩很有用,捡重要部分来压缩。矩阵的求解有公式。

 

自此,mfcc特征提取完成了。

还有很多原理上的,编程上的疑问再解决。

问:傅里叶变换过程,滤波器,数字信号处理。。很多流程。。明天理解。

 

//---------------------------------------------------------------------------------------- #ifndef __MFCCH_FLOAT_H__ #define __MFCCH_FLOAT_H__

#define SUCCESS  1 #define FAILURE  0

//---------------------------------------------------------------------------------------- #define MEL_SCALE 0 #define LIN_SCALE 1

#define CMS   0x0001  //parameters for long Info; #define DCEPS  0x0002 #define DDCEPS  0x0004

//complex structure   复数 struct COMPLEX  {  double real;  double image; };        

//MFCC extraction class  MFCC提取类 class CMFCC { public:  //采样率16KHZ,频率为从100到7000,24个滤波器,其余参数不解其意???  //constructor, allocate memory & generate pre-set parameters  构造函数,初始参数  CMFCC(long aInfo=7, int aSampleRate=16000, int aN=512,      int aFl=100, int aFh=7000, int aM=24, int aTypeOfFilterBank=0, int aP=12);  //destructor, release memory space  析构函数,释放内存  ~CMFCC(void);  //print  void Print(void);

 //Func: extract one sentence of MFCC  int MFCCbySentence(short *pnWav, int SampleCount,         float *&pfMFCC, int &FrmCount, int &Dim,         int FrmWidth=512, int FrmOffst=256);

 //Func: extract one frame of MFCC  //In: short *pnWav; //one frame of wave data  //  int FrmWid; //frame width  //Out: float *pfMFCC; //one frame of MFCC  //  int CepsOrder; //order of cepstrum  //Ret: flag of SUCCESS or FAILURE  int MFCCbyFrame(short *pnWav, int FrmWid, float *pfMFCC, int CepsOrder);    //Func: extract one frame of DMFCC  //In: float *Prev2Fea; //feature vector of previous second frame  //  float *Prev1Fea; //feature vector of previous first frame  //  float *CurrFea;  //feature vector of current frame  //  float *Next1Fea; //feature vector of next first frame  //  float *Next2Fea; //feature vector of next second frame  //  int P; //order of delta cepstrum  //Out: float *DFea; //delta cepstrum of current frame  void DMFCCbyFrame(float *Prev2Fea, float *Prev1Fea,        float *CurrFea,        float *Next1Fea, float *Next2Fea,        float *DFea, int P);

 //Func: extract one frame of DDMFCC  //In: float *PrevDFea; //delta feature vector of previous frame  //  float *CurrDFea; //delta feature vector of current frame  //  float *NextDFea; //delta feature vector of next frame  //  int P; //order of delta delta cepstrum  //Out: float *DDFea;  //delta delta cepstrum of current frame  void DDMFCCbyFrame(float *PrevDFea, float *CurrDFea, float *NextDFea,         float *DDFea, int P);

private:  //--- general parameters ---//     long Info;  int SampleRate; //sample rate  int N, P; //point number of FFT, order of cepstrum

 //--- pre-emphasize ---//  double Alfa; //factor of pre-emphasize  double *xtmp;//double xtmp[N], temp buffer for x[N]  //pre-emphasize  void PreEmp(short *pnWav, int FrmWid, double *x, int N, double Alfa);

 //--- FFT ---//  double *HammingWin; //double HammingWin[N], weights of hamming window

 //generate the weights of a hamming window  double* NewHammingWin(int Len);  //apply a hamming window  void ApplyWindow(double *In, double *Win, int Len, double *Out);

 COMPLEX *x; //COMPLEX x[N], x.real is the input for FFT  double *X;  //double X[N], spectral amplitude of this frame, only [0,N/2) is efficient

 //compute spectral amplitude  void FFTAmp(COMPLEX *x, double *Amp, int N);

 //--- triangular filterbanks & ln ---//  int M; //number of triangular filters  int Fl, Fh; //lowest and highest frequencies of all the filterbank, in Hz  double *S; //double S[M], the output of filterbanks

 double **FilterBanks; //double FilterBanks[M][N], the weights of each point and each filterbanks  int *FilterBanksL; //int FilterBanksL[M], the lower point of each filterbanks  int *FilterBanksH; //int FilterBanksH[M], the higher point of each filterbanks

 //generate a set of filterbanks on mel-scale mel频率上  void NewMelFilterBanks(int Fs, int Fl, int Fh, int N, int M,          double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH);  //generate a set of filterbanks on linear-scale  线性频率上  void NewLinFilterBanks(int Fs, int Fl, int Fh, int N, int M,          double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH);  //apply triangular windows & ln  void ApplyFilterBanks(double *X, int N, double *S, int M,         double **FilterBanks, int *FilterBanksL, int *FilterBanksH);

 //--- DCT ---//    离散余弦变换  double **DctMatrix; //double DctMatrix[P][M], the matrix of DCT

 //generate the transform matrix of DCT  double** NewDctMatrix(int nP, int nM);  //DCT  dct矩阵  void ApplyDCT(double *In, int InDim, double *Out, int OutDim, double **Matrix);

 //--- CMS ---//  //Cepstrum Mean Subtraction  差分倒谱系数的提取  void CepsMeanSub(float *pfMFCC, int FrmCount, int Dim, int CepsOrder);

 //--- remove DC component ---//   void RemoveDC(short *pnWav, int SampleCount);

 //--- Operations on a whole sentence ---//  double *pfFea; //double pfFea[P]; feature temp buffer };

//---------------------------------------------------------------------------------------- #endif

 

 感言:果然过了很多年, 才慢慢觉得信号处理是件有意思的事情了

posted @ 2014-08-25 13:26  Ivy_LMM  阅读(1198)  评论(0编辑  收藏  举报