/** Bit-packing data structure representing (part of) a bit-stream. */(speex_bits.h--49)
typedef struct SpeexBits {
char *chars; /**< "raw" data */
int nbBits; /**< Total number of bits stored in the stream*/
int charPtr; /**< Position of the byte "cursor" */
int bitPtr; /**< Position of the bit "cursor" within the current char */
int owner; /**< Does the struct "own" the "raw" buffer (member "chars") */
int overflow;/**< Set to one if we try to read past the valid data */
int buf_size;/**< Allocated size for buffer */
int reserved1; /**< Reserved for future use */
void *reserved2; /**< Reserved for future use */
} SpeexBits;
EXPORT void speex_bits_init(SpeexBits *bits) (bits.c--48)
{
bits->chars = (char*)speex_alloc(MAX_CHARS_PER_FRAME);
if (!bits->chars)
return;
bits->buf_size = MAX_CHARS_PER_FRAME;
bits->owner=1;
speex_bits_reset(bits);
}
enc_state = speex_encoder_init(&speex_nb_mode);
typedef struct SpeexNBMode { (modes.h--117)
int frameSize; /**< Size of frames used for encoding */
int subframeSize; /**< Size of sub-frames used for encoding */
int lpcSize; /**< Order of LPC filter */
int pitchStart; /**< Smallest pitch value allowed */
int pitchEnd; /**< Largest pitch value allowed */
spx_word16_t gamma1; /**< Perceptual filter parameter #1 */
spx_word16_t gamma2; /**< Perceptual filter parameter #2 */
spx_word16_t lpc_floor; /**< Noise floor for LPC analysis */
const SpeexSubmode *submodes[NB_SUBMODES]; /**< Sub-mode data for the mode */
int defaultSubmode; /**< Default sub-mode to use when encoding */
int quality_map[11]; /**< Mode corresponding to each quality setting */
} SpeexNBMode;
/* Default mode for narrowband */ (modes.c --- 320)
static const SpeexNBMode nb_mode = {
160, /*frameSize*/
40, /*subframeSize*/
10, /*lpcSize*/
17, /*pitchStart*/
144, /*pitchEnd*/
#ifdef FIXED_POINT
29491, 19661, /* gamma1, gamma2 */
#else
0.9, 0.6, /* gamma1, gamma2 */
#endif
QCONST16(.0002,15), /*lpc_floor*/
{NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7,
&nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
5,
{1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
};
/* Default mode for narrowband */ (modes.c --- 340)
EXPORT const SpeexMode speex_nb_mode = {
&nb_mode,
nb_mode_query,
"narrowband",
0,
4,
&nb_encoder_init,
&nb_encoder_destroy, (nb_celp.c)
&nb_encode,
&nb_decoder_init,
&nb_decoder_destroy,
&nb_decode,
&nb_encoder_ctl,
&nb_decoder_ctl,
};
/** Struct defining a Speex mode */ (speex.h--248)
typedef struct SpeexMode {
/** Pointer to the low-level mode data */
const void *mode;
/** Pointer to the mode query function */
mode_query_func query;
/** The name of the mode (you should not rely on this to identify the mode)*/
const char *modeName;
/**ID of the mode*/
int modeID;
/**Version number of the bitstream (incremented every time we break
bitstream compatibility*/
int bitstream_version;
/** Pointer to encoder initialization function */
encoder_init_func enc_init;
/** Pointer to encoder destruction function */
encoder_destroy_func enc_destroy;
/** Pointer to frame encoding function */
encode_func enc;
/** Pointer to decoder initialization function */
decoder_init_func dec_init;
/** Pointer to decoder destruction function */
decoder_destroy_func dec_destroy;
/** Pointer to frame decoding function */
decode_func dec;
/** ioctl-like requests for encoder */
encoder_ctl_func enc_ctl;
/** ioctl-like requests for decoder */
decoder_ctl_func dec_ctl;
} SpeexMode;
/**Structure representing the full state of the narrowband encoder*/(nb_celp.h--49)
typedef struct EncState {
const SpeexMode *mode; /**< Mode corresponding to the state */
int first; /**< Is this the first frame? */
int frameSize; /**< Size of frames */
int subframeSize; /**< Size of sub-frames */
int nbSubframes; /**< Number of sub-frames */
int windowSize; /**< Analysis (LPC) window length */
int lpcSize; /**< LPC order */
int min_pitch; /**< Minimum pitch value allowed */
int max_pitch; /**< Maximum pitch value allowed */
spx_word32_t cumul_gain; /**< Product of previously used pitch gains (Q10) */
int bounded_pitch; /**< Next frame should not rely on previous frames for pitch */
int ol_pitch; /**< Open-loop pitch */
int ol_voiced; /**< Open-loop voiced/non-voiced decision */
int *pitch;
#ifdef VORBIS_PSYCHO
VorbisPsy *psy;
float *psy_window;
float *curve;
float *old_curve;
#endif
spx_word16_t gamma1; /**< Perceptual filter: A(z/gamma1) */
spx_word16_t gamma2; /**< Perceptual filter: A(z/gamma2) */
spx_word16_t lpc_floor; /**< Noise floor multiplier for A[0] in LPC analysis*/
char *stack; /**< Pseudo-stack allocation for temporary memory */
spx_word16_t *winBuf; /**< Input buffer (original signal) */
spx_word16_t *excBuf; /**< Excitation buffer */
spx_word16_t *exc; /**< Start of excitation frame */
spx_word16_t *swBuf; /**< Weighted signal buffer */
spx_word16_t *sw; /**< Start of weighted signal frame */
const spx_word16_t *window; /**< Temporary (Hanning) window */
const spx_word16_t *lagWindow; /**< Window applied to auto-correlation */
spx_lsp_t *old_lsp; /**< LSPs for previous frame */
spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */
spx_mem_t *mem_sp; /**< Filter memory for signal synthesis */
spx_mem_t *mem_sw; /**< Filter memory for perceptually-weighted signal */
spx_mem_t *mem_sw_whole; /**< Filter memory for perceptually-weighted signal (whole frame)*/
spx_mem_t *mem_exc; /**< Filter memory for excitation (whole frame) */
spx_mem_t *mem_exc2; /**< Filter memory for excitation (whole frame) */
spx_mem_t mem_hp[2]; /**< High-pass filter memory */
spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */
spx_word16_t *innov_rms_save; /**< If non-NULL, innovation RMS is copied here */
#ifndef DISABLE_VBR
VBRState *vbr; /**< State of the VBR data */
float vbr_quality; /**< Quality setting for VBR encoding */
float relative_quality; /**< Relative quality that will be needed by VBR */
spx_int32_t vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */
spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode */
int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */
int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */
int dtx_count; /**< Number of consecutive DTX frames */
spx_int32_t abr_enabled; /**< ABR setting (in bps), 0 if off */
float abr_drift;
float abr_drift2;
float abr_count;
#endif /* #ifndef DISABLE_VBR */
int complexity; /**< Complexity setting (0-10 from least complex to most complex) */
spx_int32_t sampling_rate;
int plc_tuning;
int encode_submode;
const SpeexSubmode * const *submodes; /**< Sub-mode data */
int submodeID; /**< Activated sub-mode */
int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
int isWideband; /**< Is this used as part of the embedded wideband codec */
int highpass_enabled; /**< Is the input filter enabled */
} EncState;
speex_encoder_ctl(enc_state,SPEEX_SET_QUALITY,&quality);(sb_celp.c--1196)
st->submodeSelect = st->submodeID = ((const SpeexSBMode*)(st->mode->mode))->quality_map[quality];
quality_map[quality] 默认值:{1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
st->submodeSelect: /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
st->submodeID:**< Activated sub-mode */
/** Struct defining the encoding/decoding mode for SB-CELP (wideband) */
typedef struct SpeexSBMode {
const SpeexMode *nb_mode; /**< Embedded narrowband mode */
int frameSize; /**< Size of frames used for encoding */
int subframeSize; /**< Size of sub-frames used for encoding */
int lpcSize; /**< Order of LPC filter */
spx_word16_t gamma1; /**< Perceptual filter parameter #1 */
spx_word16_t gamma2; /**< Perceptual filter parameter #1 */
spx_word16_t lpc_floor; /**< Noise floor for LPC analysis */
spx_word16_t folding_gain;
const SpeexSubmode *submodes[SB_SUBMODES]; /**< Sub-mode data for the mode */
int defaultSubmode; /**< Default sub-mode to use when encoding */
int low_quality_map[11]; /**< Mode corresponding to each quality setting */
int quality_map[11]; /**< Mode corresponding to each quality setting */
#ifndef DISABLE_VBR
const float (*vbr_thresh)[11];
#endif
int nb_modes;
} SpeexSBMode;
quality与bit-rate对照表:
mode quality bit-rate mflops quality/description
1 0 2,150 6 Vocoder (mostly for comfort noise)
2 2 5,950 9 Very noticeable artifacts/noise, good intelligibility
3 3-4 8,000 10 Artifacts/noise sometimes noticeable
4 5-6 11,000 14 Artifacts usually noticeable only with headphones
5 7-8 15,000 11 Need good headphones to tell the difference
6 9 18,200 17.5 Hard to tell the difference even with good headphones
7 10 24,600 14.5 Completely transparent for voice, good quality music
8 1 3,950 10.5
speex_encode_int(enc_state, input_frame, &bits);
EXPORT int speex_encode_int(void *state, spx_int16_t *in, SpeexBits *bits)
{
int i;
spx_int32_t N;
float float_in[MAX_IN_SAMPLES];
speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N);
for (i=0;i<N;i++)
float_in[i] = in[i];
return (*((SpeexMode**)state))->enc(state, float_in, bits);//nb_encode (cb_clep.c--252)
}
nbBytes = speex_bits_write(&bits, byte_ptr, MAX_NB_BYTES);
EXPORT int speex_bits_write(SpeexBits *bits, char *chars, int max_nbytes) (bits.--188)
{
int i;
int max_nchars = max_nbytes/BYTES_PER_CHAR;
int charPtr, bitPtr, nbBits;
/* Insert terminator, but save the data so we can put it back after */
bitPtr=bits->bitPtr;
charPtr=bits->charPtr;
nbBits=bits->nbBits;
speex_bits_insert_terminator(bits);
bits->bitPtr=bitPtr;
bits->charPtr=charPtr;
bits->nbBits=nbBits;
if (max_nchars > ((bits->nbBits+BITS_PER_CHAR-1)>>LOG2_BITS_PER_CHAR))
max_nchars = ((bits->nbBits+BITS_PER_CHAR-1)>>LOG2_BITS_PER_CHAR);
for (i=0;i<max_nchars;i++)
chars[i]=HTOLS(bits->chars[i]);
return max_nchars*BYTES_PER_CHAR;
}
typedef struct SpeexBits {
char *chars; /**< "raw" data */
int nbBits; /**< Total number of bits stored in the stream*/
int charPtr; /**< Position of the byte "cursor" */
int bitPtr; /**< Position of the bit "cursor" within the current char */
int owner; /**< Does the struct "own" the "raw" buffer (member "chars") */
int overflow;/**< Set to one if we try to read past the valid data */
int buf_size;/**< Allocated size for buffer */
int reserved1; /**< Reserved for future use */
void *reserved2; /**< Reserved for future use */
} SpeexBits;
EXPORT void speex_bits_init(SpeexBits *bits) (bits.c--48)
{
bits->chars = (char*)speex_alloc(MAX_CHARS_PER_FRAME);
if (!bits->chars)
return;
bits->buf_size = MAX_CHARS_PER_FRAME;
bits->owner=1;
speex_bits_reset(bits);
}
enc_state = speex_encoder_init(&speex_nb_mode);
typedef struct SpeexNBMode { (modes.h--117)
int frameSize; /**< Size of frames used for encoding */
int subframeSize; /**< Size of sub-frames used for encoding */
int lpcSize; /**< Order of LPC filter */
int pitchStart; /**< Smallest pitch value allowed */
int pitchEnd; /**< Largest pitch value allowed */
spx_word16_t gamma1; /**< Perceptual filter parameter #1 */
spx_word16_t gamma2; /**< Perceptual filter parameter #2 */
spx_word16_t lpc_floor; /**< Noise floor for LPC analysis */
const SpeexSubmode *submodes[NB_SUBMODES]; /**< Sub-mode data for the mode */
int defaultSubmode; /**< Default sub-mode to use when encoding */
int quality_map[11]; /**< Mode corresponding to each quality setting */
} SpeexNBMode;
/* Default mode for narrowband */ (modes.c --- 320)
static const SpeexNBMode nb_mode = {
160, /*frameSize*/
40, /*subframeSize*/
10, /*lpcSize*/
17, /*pitchStart*/
144, /*pitchEnd*/
#ifdef FIXED_POINT
29491, 19661, /* gamma1, gamma2 */
#else
0.9, 0.6, /* gamma1, gamma2 */
#endif
QCONST16(.0002,15), /*lpc_floor*/
{NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7,
&nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL},
5,
{1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
};
/* Default mode for narrowband */ (modes.c --- 340)
EXPORT const SpeexMode speex_nb_mode = {
&nb_mode,
nb_mode_query,
"narrowband",
0,
4,
&nb_encoder_init,
&nb_encoder_destroy, (nb_celp.c)
&nb_encode,
&nb_decoder_init,
&nb_decoder_destroy,
&nb_decode,
&nb_encoder_ctl,
&nb_decoder_ctl,
};
/** Struct defining a Speex mode */ (speex.h--248)
typedef struct SpeexMode {
/** Pointer to the low-level mode data */
const void *mode;
/** Pointer to the mode query function */
mode_query_func query;
/** The name of the mode (you should not rely on this to identify the mode)*/
const char *modeName;
/**ID of the mode*/
int modeID;
/**Version number of the bitstream (incremented every time we break
bitstream compatibility*/
int bitstream_version;
/** Pointer to encoder initialization function */
encoder_init_func enc_init;
/** Pointer to encoder destruction function */
encoder_destroy_func enc_destroy;
/** Pointer to frame encoding function */
encode_func enc;
/** Pointer to decoder initialization function */
decoder_init_func dec_init;
/** Pointer to decoder destruction function */
decoder_destroy_func dec_destroy;
/** Pointer to frame decoding function */
decode_func dec;
/** ioctl-like requests for encoder */
encoder_ctl_func enc_ctl;
/** ioctl-like requests for decoder */
decoder_ctl_func dec_ctl;
} SpeexMode;
/**Structure representing the full state of the narrowband encoder*/(nb_celp.h--49)
typedef struct EncState {
const SpeexMode *mode; /**< Mode corresponding to the state */
int first; /**< Is this the first frame? */
int frameSize; /**< Size of frames */
int subframeSize; /**< Size of sub-frames */
int nbSubframes; /**< Number of sub-frames */
int windowSize; /**< Analysis (LPC) window length */
int lpcSize; /**< LPC order */
int min_pitch; /**< Minimum pitch value allowed */
int max_pitch; /**< Maximum pitch value allowed */
spx_word32_t cumul_gain; /**< Product of previously used pitch gains (Q10) */
int bounded_pitch; /**< Next frame should not rely on previous frames for pitch */
int ol_pitch; /**< Open-loop pitch */
int ol_voiced; /**< Open-loop voiced/non-voiced decision */
int *pitch;
#ifdef VORBIS_PSYCHO
VorbisPsy *psy;
float *psy_window;
float *curve;
float *old_curve;
#endif
spx_word16_t gamma1; /**< Perceptual filter: A(z/gamma1) */
spx_word16_t gamma2; /**< Perceptual filter: A(z/gamma2) */
spx_word16_t lpc_floor; /**< Noise floor multiplier for A[0] in LPC analysis*/
char *stack; /**< Pseudo-stack allocation for temporary memory */
spx_word16_t *winBuf; /**< Input buffer (original signal) */
spx_word16_t *excBuf; /**< Excitation buffer */
spx_word16_t *exc; /**< Start of excitation frame */
spx_word16_t *swBuf; /**< Weighted signal buffer */
spx_word16_t *sw; /**< Start of weighted signal frame */
const spx_word16_t *window; /**< Temporary (Hanning) window */
const spx_word16_t *lagWindow; /**< Window applied to auto-correlation */
spx_lsp_t *old_lsp; /**< LSPs for previous frame */
spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */
spx_mem_t *mem_sp; /**< Filter memory for signal synthesis */
spx_mem_t *mem_sw; /**< Filter memory for perceptually-weighted signal */
spx_mem_t *mem_sw_whole; /**< Filter memory for perceptually-weighted signal (whole frame)*/
spx_mem_t *mem_exc; /**< Filter memory for excitation (whole frame) */
spx_mem_t *mem_exc2; /**< Filter memory for excitation (whole frame) */
spx_mem_t mem_hp[2]; /**< High-pass filter memory */
spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */
spx_word16_t *innov_rms_save; /**< If non-NULL, innovation RMS is copied here */
#ifndef DISABLE_VBR
VBRState *vbr; /**< State of the VBR data */
float vbr_quality; /**< Quality setting for VBR encoding */
float relative_quality; /**< Relative quality that will be needed by VBR */
spx_int32_t vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */
spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode */
int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */
int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */
int dtx_count; /**< Number of consecutive DTX frames */
spx_int32_t abr_enabled; /**< ABR setting (in bps), 0 if off */
float abr_drift;
float abr_drift2;
float abr_count;
#endif /* #ifndef DISABLE_VBR */
int complexity; /**< Complexity setting (0-10 from least complex to most complex) */
spx_int32_t sampling_rate;
int plc_tuning;
int encode_submode;
const SpeexSubmode * const *submodes; /**< Sub-mode data */
int submodeID; /**< Activated sub-mode */
int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
int isWideband; /**< Is this used as part of the embedded wideband codec */
int highpass_enabled; /**< Is the input filter enabled */
} EncState;
speex_encoder_ctl(enc_state,SPEEX_SET_QUALITY,&quality);(sb_celp.c--1196)
st->submodeSelect = st->submodeID = ((const SpeexSBMode*)(st->mode->mode))->quality_map[quality];
quality_map[quality] 默认值:{1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7}
st->submodeSelect: /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
st->submodeID:**< Activated sub-mode */
/** Struct defining the encoding/decoding mode for SB-CELP (wideband) */
typedef struct SpeexSBMode {
const SpeexMode *nb_mode; /**< Embedded narrowband mode */
int frameSize; /**< Size of frames used for encoding */
int subframeSize; /**< Size of sub-frames used for encoding */
int lpcSize; /**< Order of LPC filter */
spx_word16_t gamma1; /**< Perceptual filter parameter #1 */
spx_word16_t gamma2; /**< Perceptual filter parameter #1 */
spx_word16_t lpc_floor; /**< Noise floor for LPC analysis */
spx_word16_t folding_gain;
const SpeexSubmode *submodes[SB_SUBMODES]; /**< Sub-mode data for the mode */
int defaultSubmode; /**< Default sub-mode to use when encoding */
int low_quality_map[11]; /**< Mode corresponding to each quality setting */
int quality_map[11]; /**< Mode corresponding to each quality setting */
#ifndef DISABLE_VBR
const float (*vbr_thresh)[11];
#endif
int nb_modes;
} SpeexSBMode;
quality与bit-rate对照表:
mode quality bit-rate mflops quality/description
1 0 2,150 6 Vocoder (mostly for comfort noise)
2 2 5,950 9 Very noticeable artifacts/noise, good intelligibility
3 3-4 8,000 10 Artifacts/noise sometimes noticeable
4 5-6 11,000 14 Artifacts usually noticeable only with headphones
5 7-8 15,000 11 Need good headphones to tell the difference
6 9 18,200 17.5 Hard to tell the difference even with good headphones
7 10 24,600 14.5 Completely transparent for voice, good quality music
8 1 3,950 10.5
speex_encode_int(enc_state, input_frame, &bits);
EXPORT int speex_encode_int(void *state, spx_int16_t *in, SpeexBits *bits)
{
int i;
spx_int32_t N;
float float_in[MAX_IN_SAMPLES];
speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N);
for (i=0;i<N;i++)
float_in[i] = in[i];
return (*((SpeexMode**)state))->enc(state, float_in, bits);//nb_encode (cb_clep.c--252)
}
nbBytes = speex_bits_write(&bits, byte_ptr, MAX_NB_BYTES);
EXPORT int speex_bits_write(SpeexBits *bits, char *chars, int max_nbytes) (bits.--188)
{
int i;
int max_nchars = max_nbytes/BYTES_PER_CHAR;
int charPtr, bitPtr, nbBits;
/* Insert terminator, but save the data so we can put it back after */
bitPtr=bits->bitPtr;
charPtr=bits->charPtr;
nbBits=bits->nbBits;
speex_bits_insert_terminator(bits);
bits->bitPtr=bitPtr;
bits->charPtr=charPtr;
bits->nbBits=nbBits;
if (max_nchars > ((bits->nbBits+BITS_PER_CHAR-1)>>LOG2_BITS_PER_CHAR))
max_nchars = ((bits->nbBits+BITS_PER_CHAR-1)>>LOG2_BITS_PER_CHAR);
for (i=0;i<max_nchars;i++)
chars[i]=HTOLS(bits->chars[i]);
return max_nchars*BYTES_PER_CHAR;
}