导航

[Delphi]转换Dialogic Vox文件为Wave文件

Posted on 2005-12-04 14:52  xujh  阅读(4577)  评论(15编辑  收藏  举报

千辛万苦才查阅资料写出来的,自己高兴一下。转换类支持8/16位,6000/8000采样率,ADPCM,muLaw,aLaw多种格式的Vox转换。

很奇怪的是,华为的ICD平台录下来的音格式上有微小不同,一个平台的语音用我们这个程序可以正常转换,但另一个平台的录音转换完之后却丢失0轴以上的波形,但听起来声音基本一样。估计2个平台选用的录音卡有所不同。可是,用CoolEdit却可以正常显示2个语音的波形,不知里面的算法有什么微小的差异。用CoolEdit将我们不能正常显示波形的声音重新另存为Dialogic Vox格式后进行比较,发现前后两个声音确有不同。我们再对CoolEdit另存的声音进行转换,结果显示波形正常。

最后的结果,终于波形调整好了,原因是需要判断当累计出现了48个二进制的1000或0000的Sample后,要对ADPCM初始化一次,转换后的噪音比源文件略为大了一些,但可以接受。

关于初始化的原文如下,总觉得自己翻译的还不准确

“Initial Conditions
When the ADPCM algorithm is reset, the step size ss(n) is set to the minimum value (16) and the
estimated waveform value X is set to zero (half scale). Playback of 48 samples (24 bytes) of plus
and minus zero (10002 and 00002) will reset the algorithm. Twenty-four bytes of 08 Hex or 80
Hex will satisfy this requirement. It is necessary to alternate positive and negative zero values
because the encoding formula always adds 1/8 of the quantization size. If all values were positive
or negative, a DC component would be added that would create a false reference level.”

调用示例

procedure TForm1.btnBrowseClick(Sender: TObject);
var
  vox:TVox;
begin
  if (OpenDialog1.Execute) then
  begin

    //默认为6K采样率,8位,ADPCM格式
    vox := TVox.Create(OpenDialog1.FileName);
    if(vox.Convert=0) then
      ShowMessage('Convert Success! The outfile be saved as '+vox.OutFileName);
    vox.Free;
  end;
end;

Class of Convert Vox to Wave

unit uVoxToWave;

interface
uses
  SysUtils;
type
  TVoxFormat = (VF_ADPCM = 1, VF_MULAW = 2, VF_ALAW = 3);
  TVoxRate = (VR_6K = 6000, VR_8K = 8000);
  TVoxBitsPerSample = (VB_8 = 1, VB_16 = 2);
  TWaveHead = packed record
    cHead: array[0..3] of char; {'RIFF'}
    nLength: longint;
    cWaveTag: array[0..7] of char; {'WAVEfmt '}
    nHeaderLength: LongInt; {16}
    wFormatTag: Word; { format type 01 00}
    nChannels: Word; { number of channels (i.e. mono, stereo, etc.) 01}
    nSamplesPerSec: longint; { sample rate 8000}
    nAvgBytesPerSec: longint; { for buffer estimation 8000}
    nBlockAlign: Word; {1}
    wBitsPerSample: word; {8}
  end;
  TDataHead = packed record
    cDataTag: array[0..3] of char; {'data'}
    nDatalen: longint;
  end;
  PWaveHead = ^TWaveHead;
  TVox = class
  private
    FInFileName, FOutFileName: string;
    FVoxFormat: TVoxFormat;
    FVoxRate: TVoxRate;
    FVoxBitsPerSample: TVoxBitsPerSample;
    rate, bit_rate: integer;
    sample: byte; // sample read from input file
    buffer: array[0..9999] of byte; // a block of input data

    SS: Word; // current step size for ADPCM
    SSindex: Word; // current index into step size table
    Sn: Smallint; // current 12-bit linear sample value
    out_val: byte; // .WAV output value
    out_int: Smallint; // linear output value
    function decode(sample: byte; Sn: SmallInt; var SS: WORD; var SSindex: WORD): smallint;
    procedure ConvertADPCM(infile, outfile: integer);
    procedure ConvertMULAW(infile, outfile: integer);
    procedure ConvertALAW(infile, outfile: integer);
  published
    property InFileName: string read FInFileName;
    property OutFileName: string read FOutFileName;
  public
    constructor Create(InFileName: string; OutFileName: string = ''; VoxFormat: TVoxFormat = VF_ADPCM; VoxRate: TVoxRate = VR_6K; VoxBitsPerSample: TVoxBitsPerSample = VB_8);
    function Convert: integer;
  end;
const
  SEEK_SET = 0;
  SEEK_END = 2;
  ResetValue = 48;
  formats: array[0..2] of string = ('ADPCM', 'Mu-Law', 'A-Law');
  mulaw: array[0..255] of smallint = (
    -32124, -31100, -30076, -29052, -28028, -27004, -25980, -24956,
    -23932, -22908, -21884, -20860, -19836, -18812, -17788, -16764,
    -15996, -15484, -14972, -14460, -13948, -13436, -12924, -12412,
    -11900, -11388, -10876, -10364, -9852, -9340, -8828, -8316,
    -7932, -7676, -7420, -7164, -6908, -6652, -6396, -6140,
    -5884, -5628, -5372, -5116, -4860, -4604, -4348, -4092,
    -3900, -3772, -3644, -3516, -3388, -3260, -3132, -3004,
    -2876, -2748, -2620, -2492, -2364, -2236, -2108, -1980,
    -1884, -1820, -1756, -1692, -1628, -1564, -1500, -1436,
    -1372, -1308, -1244, -1180, -1116, -1052, -988, -924,
    -876, -844, -812, -780, -748, -716, -684, -652,
    -620, -588, -556, -524, -492, -460, -428, -396,
    -372, -356, -340, -324, -308, -292, -276, -260,
    -244, -228, -212, -196, -180, -164, -148, -132,
    -120, -112, -104, -96, -88, -80, -72, -64,
    -56, -48, -40, -32, -24, -16, -8, 0,
    32124, 31100, 30076, 29052, 28028, 27004, 25980, 24956,
    23932, 22908, 21884, 20860, 19836, 18812, 17788, 16764,
    15996, 15484, 14972, 14460, 13948, 13436, 12924, 12412,
    11900, 11388, 10876, 10364, 9852, 9340, 8828, 8316,
    7932, 7676, 7420, 7164, 6908, 6652, 6396, 6140,
    5884, 5628, 5372, 5116, 4860, 4604, 4348, 4092,
    3900, 3772, 3644, 3516, 3388, 3260, 3132, 3004,
    2876, 2748, 2620, 2492, 2364, 2236, 2108, 1980,
    1884, 1820, 1756, 1692, 1628, 1564, 1500, 1436,
    1372, 1308, 1244, 1180, 1116, 1052, 988, 924,
    876, 844, 812, 780, 748, 716, 684, 652,
    620, 588, 556, 524, 492, 460, 428, 396,
    372, 356, 340, 324, 308, 292, 276, 260,
    244, 228, 212, 196, 180, 164, 148, 132,
    120, 112, 104, 96, 88, 80, 72, 64,
    56, 48, 40, 32, 24, 16, 8, 0
    );
  alaw: array[0..255] of smallint = (
    -5504, -5248, -6016, -5760, -4480, -4224, -4992, -4736,
    -7552, -7296, -8064, -7808, -6528, -6272, -7040, -6784,
    -2752, -2624, -3008, -2880, -2240, -2112, -2496, -2368,
    -3776, -3648, -4032, -3904, -3264, -3136, -3520, -3392,
    -22016, -20992, -24064, -23040, -17920, -16896, -19968, -18944,
    -30208, -29184, -32256, -31232, -26112, -25088, -28160, -27136,
    -11008, -10496, -12032, -11520, -8960, -8448, -9984, -9472,
    -15104, -14592, -16128, -15616, -13056, -12544, -14080, -13568,
    -344, -328, -376, -360, -280, -264, -312, -296,
    -472, -456, -504, -488, -408, -392, -440, -424,
    -88, -72, -120, -104, -24, -8, -56, -40,
    -216, -200, -248, -232, -152, -136, -184, -168,
    -1376, -1312, -1504, -1440, -1120, -1056, -1248, -1184,
    -1888, -1824, -2016, -1952, -1632, -1568, -1760, -1696,
    -688, -656, -752, -720, -560, -528, -624, -592,
    -944, -912, -1008, -976, -816, -784, -880, -848,
    5504, 5248, 6016, 5760, 4480, 4224, 4992, 4736,
    7552, 7296, 8064, 7808, 6528, 6272, 7040, 6784,
    2752, 2624, 3008, 2880, 2240, 2112, 2496, 2368,
    3776, 3648, 4032, 3904, 3264, 3136, 3520, 3392,
    22016, 20992, 24064, 23040, 17920, 16896, 19968, 18944,
    30208, 29184, 32256, 31232, 26112, 25088, 28160, 27136,
    11008, 10496, 12032, 11520, 8960, 8448, 9984, 9472,
    15104, 14592, 16128, 15616, 13056, 12544, 14080, 13568,
    344, 328, 376, 360, 280, 264, 312, 296,
    472, 456, 504, 488, 408, 392, 440, 424,
    88, 72, 120, 104, 24, 8, 56, 40,
    216, 200, 248, 232, 152, 136, 184, 168,
    1376, 1312, 1504, 1440, 1120, 1056, 1248, 1184,
    1888, 1824, 2016, 1952, 1632, 1568, 1760, 1696,
    688, 656, 752, 720, 560, 528, 624, 592,
    944, 912, 1008, 976, 816, 784, 880, 848
    );

// stepsize adjustments per Dialogic Application Note 1366
  SSadjust: array[0..7] of smallint = (-1, -1, -1, -1, 2, 4, 6, 8);

// Calculated stepsizes per Dialogic Application Note 1366
  SStable: array[0..49] of smallint = (0, 16, 17, 19, 21, 23, 25, 28, 31,
    34, 37, 41, 45, 50, 55, 60, 66, 73,
    80, 88, 97, 107, 118, 130, 143, 157, 173,
    190, 209, 230, 253, 279, 307, 337, 371, 408,
    449, 494, 544, 598, 658, 724, 796, 876, 963,
    1060, 1166, 1282, 1411, 1552);

implementation

constructor TVox.Create(InFileName: string; OutFileName: string = ''; VoxFormat: TVoxFormat = VF_ADPCM; VoxRate: TVoxRate = VR_6K; VoxBitsPerSample: TVoxBitsPerSample = VB_8);
begin
  FInFileName := Trim(InFileName);
  FOutFileName := Trim(OutFileName);
  FVoxFormat := VoxFormat;
  FVoxRate := VoxRate;
  FVoxBitsPerSample := VoxBitsPerSample;

  if (FOutFileName = '') then
    FOutFileName := ChangeFileExt(FInFileName, '.wav');
  rate := Integer(FVoxRate);
  bit_rate := Integer(FVoxBitsPerSample);
end;

function TVox.decode(sample: byte; Sn: SmallInt; var SS: WORD; var SSindex: WORD): smallint;
var
  Mn: SmallInt;
begin
  //转换公式如下
  //d(n) = (ss(n)*B2)+(ss(n)/2*B1)+(ss(n)/4*BO)+(ss(n)/8)
  //if (B3 = 1)
  //then d(n) = d(n) * (-1)
  //X(n) = X(n-1) + d(n)

  Mn := 0; // calculate the linear adjustment
  if (sample and $4) <> 0 then Mn := SS;
  if (sample and $2) <> 0 then Mn := Mn + (SS shr 1); //div 2
  if (sample and $1) <> 0 then Mn := Mn + (SS shr 2); //div 4
  Mn := Mn + (SS shr 3); //div 8
  //取Sample的符号位,即最高位
  if (sample and $8) <> 0 then //最高位为1,则符号位为负
    Sn := Sn - Mn // ...subtract the adjustment
  else // 符号位为正
    Sn := Sn + Mn; // ...add the adjustment

  if (Sn > 2047) then // adjust if sample too large...
    Sn := 2047;
  if (Sn < -2048) then // ...or too small
    Sn := -2048;

  // use as index into step size adjustment, adjust step size index
  SSindex := SSIndex + SSadjust[sample and $7];

  if (SSindex < 1) then // keep SSindex within bounds...
    SSindex := 1;
  if (SSindex > 49) then
    SSindex := 49;

  SS := SStable[SSindex]; // get new step size from table

  Result := Sn;
end;
//----- Dialogc VOX语音格式说明,以6K/8Bits示例-----------------------------------------------------
//一个字节中包含两个SAMPLE,7-4位为Sample N,3-0位为Sample N+1
//每一个Sample都是以ADPCM(Adaptive Differential Pulse Code Modulation)
//进行编码,每4位(3,2,1,0)构成的Sample中,其中第3位为符号位 0-正 1-负,2-0位相对前一个Sample的变化量
//出现48个二进制的1000或0000,则需要复位ADPCM算法初始值
//---------------------------------------------------------------------------------------------------

procedure TVox.ConvertADPCM(infile, outfile: integer);
var
  index: Integer; // index for misc. arrays
  bytes: Integer; // number of bytes read into a buffer
  outindex: Integer; //index for output arrays
  outbytebuffer: array[0..19999] of byte; //output buffer
  outintbuffer: array[0..19999] of Smallint; //output buffer
  i: integer;//Reset Count
  HighByte,LowByte:Byte;
begin
  Sn := 0; // initialize the ADPCM variables
  SS := 16; // initialize the Step
  SSindex := 1;
  i := 0;
  // read a byte of ADPCM data
  bytes := FileRead(infile, buffer, 10000);
  while (bytes > 0) do
  begin
    outindex := 0;
    for index := 0 to (bytes - 1) do
    begin
      sample := buffer[index];
      HighByte := sample shr 4;
      LowByte := sample and $F;
      if (HighByte =0) or (HighByte=8) then
        inc(i);
      // 先解压缩一个字节中的高4位...
      Sn := decode(WORD(sample shr 4), Sn, SS, SSindex);
      if (bit_rate = 1) then // if 8 bits per sample...
      begin
        out_int := Sn shr 4; // rescale output from -128 thru 127
        if (out_int > 127) then // clip if above or below WAV bounds
          out_int := 127;
        if (out_int < -128) then
          out_int := -128;
        out_val := byte(out_int - 128); // convert to .WAV format
        outbytebuffer[outindex] := out_val; // write the output byte
        Inc(outindex);
      end
      else
      begin
        out_int := Sn shl 4; // rescale to 16 bits
        outintbuffer[outindex] := out_int; // write the output int
        Inc(outindex);
      end;
      if i = ResetValue then  //Reset ADPCM variables
      begin
        Sn := 0; // initialize the ADPCM variables
        SS := 16; // initialize the Step
        i := 0;
      end;
      if (LowByte=0) or (LowByte=8) then
        inc(i);
       // now decode the low nibble...
      Sn := decode(WORD(sample and $F), Sn, SS, SSindex);
      if (bit_rate = 1) then // if 8 bits per sample...
      begin
        out_int := Sn shr 4; // rescale output from -128 thru 127

        if (out_int > 127) then // clip if above or below WAV bounds
          out_int := 127;
        if (out_int < -128) then
          out_int := -128;

        out_val := out_int - 128; // convert to .WAV format
        outbytebuffer[outindex] := out_val; // write the output byte
        Inc(outindex);
      end
      else
      begin
        out_int := Sn shl 4; // rescale to 16 bits
        outintbuffer[outindex] := out_int; // write the output int
        Inc(outindex);
      end;
      if i = ResetValue then  //Reset ADPCM variables
      begin
        Sn := 0; // initialize the ADPCM variables
        SS := 16; // initialize the Step
        i := 0;
      end;
    end;
    if (bit_rate = 1) then
      FileWrite(outfile, outbytebuffer, bytes * 2)
    else
      FileWrite(outfile, outintbuffer, bytes * 4);
    bytes := FileRead(infile, buffer, 10000);
  end;
end;

procedure TVox.ConvertMULAW(infile, outfile: integer);
var
  index: Integer; // index for misc. arrays
  bytes: Integer; // number of bytes read into a buffer
  outindex: Integer; //index for output arrays
  outbytebuffer: array[0..19999] of byte; //output buffer
  outintbuffer: array[0..19999] of SmallInt; //output buffer
begin
  // read a block of mu-law data
  bytes := FileRead(infile, buffer, 10000);
  while (bytes > 0) do
  begin
    outindex := 0;
    for index := 0 to bytes - 1 do
    begin
      out_int := mulaw[buffer[index]]; // convert mu-law to linear

      if (bit_rate = 1) then // if 8 bits per sample...
      begin
        out_int := out_int shr 8; // rescale for 8 bit values
        out_val := out_int + 128; // convert to .WAV format
        outbytebuffer[outindex] := out_val; // write the output byte
        Inc(outindex);
      end
      else
      begin
        outintbuffer[outindex] := out_int; // write the output int
        Inc(outindex);
      end;
    end;
    if (bit_rate = 1) then
      FileWrite(outfile, outbytebuffer, bytes * 2)
    else
      FileWrite(outfile, outintbuffer, bytes * 4);
    bytes := FileRead(infile, buffer, 10000);
  end;
end;

procedure TVox.ConvertALAW(infile, outfile: integer);
var
  index: Integer; // index for misc. arrays
  bytes: Integer; // number of bytes read into a buffer
  outindex: Integer; //index for output arrays
  outbytebuffer: array[0..19999] of byte; //output buffer
  outintbuffer: array[0..19999] of Smallint; //output buffer
begin
  // read a block of a-law data
  bytes := FileRead(infile, buffer, 10000);
  while (bytes > 0) do
  begin
    outindex := 0;
    for index := 0 to bytes - 1 do
    begin
      out_int := alaw[buffer[index]]; // convert a-law to linear

      if (bit_rate = 1) then // if 8 bits per sample...
      begin
        out_int := out_int shr 8; // rescale for WAV file
        out_val := out_int + 128; // convert to .WAV format
        outbytebuffer[outindex] := out_val; // write the output byte
        Inc(outindex);
      end
      else
      begin
        outintbuffer[outindex] := out_int; // write the output int
        Inc(outindex);
      end;
    end;
    if (bit_rate = 1) then
      FileWrite(outfile, outbytebuffer, bytes * 2)
    else
      FileWrite(outfile, outintbuffer, bytes * 4);
    bytes := FileRead(infile, buffer, 10000);
  end;
end;

function TVox.Convert: integer;
var
  infile, outfile, filesize: Integer;
  WaveHead: TWaveHead;
  DataHead: TDataHead;
begin
  infile := FileOpen(FInFileName, $0040);
  if (infile <= 0) then //If can't create Input File...
  begin
    Result := -2;
    exit;
  end;
  outfile := FileCreate(FOutFileName);
  if (outfile <= 0) then // If can't create Output File....
  begin
    FileClose(infile); //close Input File
    Result := -3;
    exit;
  end;
  filesize := FileSeek(infile, 0, SEEK_END); //get size of input file
  if (FVoxFormat = VF_ADPCM) then // if using ADPCM input format...
    filesize := filesize * 2; // change from bytes to samples
  FileSeek(infile, 0, SEEK_SET); // seek back to beginning of input
  //---------Create Wave File Head ---------------------------------------------
  StrCopy(WaveHead.cHead, 'RIFF'); // .WAV begins with "RIFF"
  WaveHead.nLength := (filesize * bit_rate) + sizeof(TWaveHead); // size of .WAV file (data+header)
  StrCopy(WaveHead.cWaveTag, 'WAVEfmt ');
  WaveHead.nHeaderLength := 16; // size of .WAV file header
  WaveHead.wFormatTag := 1; // format tag (01 = Windows PCM)
  WaveHead.nChannels := 1; // channels (1=mono, 2=stereo)
  WaveHead.nSamplesPerSec := rate; // samples per second
  WaveHead.nAvgBytesPerSec := rate * bit_rate; // bytes per second during play
  WaveHead.nBlockAlign := bit_rate; // bytes per sample
  WaveHead.wBitsPerSample := 8 * bit_rate; // bits per sample
  StrCopy(DataHead.cDataTag, 'data'); // specify "data" follows
  DataHead.nDatalen := filesize * bit_rate; // write size of .WAV data portion
  FileWrite(outfile, WaveHead, sizeof(TWaveHead));
  FileWrite(outfile, DataHead, sizeof(TDataHead));
  case FVoxFormat of
    VF_ADPCM: ConvertADPCM(infile, outfile);
    VF_MULAW: ConvertMULAW(infile, outfile);
    VF_ALAW: ConvertALAW(infile, outfile);
  end;

  FileClose(infile);
  FileClose(outfile);
  Result := 0;
end;

end.