(转载)音频编解码基础(wav/aac/pcma/pcmu)

原文:https://blog.csdn.net/hiwubihe/article/details/81258879

[音频编解码系列文章]

音频编解码基础
FFMPEG实现音频重采样
FFMPEG实现PCM编码(采用封装格式实现)
FFMPEG实现PCM编码(不采用封装格式实现)
FAAC库实现PCM编码
FAAD库实现RAW格式AAC解码
FAAD库实现RAW格式AAC封装成ADTS格式
FAAD库实现ADTS格式解码
FFMPEG实现对AAC解码(采用封装格式实现)
FFMPEG实现对AAC解码(不采用封装格式实现)

本文介绍音频处理基础知识,介绍常见的音频问题处理。主要包含以下内容

  • WAV头解析并保持PCM
  • PCM文件加WAV头
  • ADTS格式AAC帧获取
  • PCM转G711A/G711U

WAV格式介绍

WAV是微软的RIFF文件的一个特例,通常由一个文件头和若干个CHUNK组成,通常是由RIFF文件头类型“WAVE”子chunk为“fmt”,“data”,和可选数量的chunk组成。format如下:

一段WAV格式实例

52 49 46 46 24 08 00 00 57 41 56 45 66 6d 74 20 10 00 00 00 01 00 02 00
22 56 00 00 88 58 01 00 04 00 10 00 64 61 74 61 00 08 00 00 00 00 00 00
24 17 1e f3 3c 13 3c 14 16 f9 18 f9 34 e7 23 a6 3c f2 24 f2 11 ce 1a 0d
结构分析如下图

注意事项

       1.二进制数据都是"小端"存储方式。

       2.样本存储8位范围为(0-255),样本存储格式16位范围(-32768-32767)

WAV头解析并保持PCM

WAV文件是一种WINDOS riff文件,只是对PCM加了一个头,没做压缩处理,加完头后一般播放就能播放了。DEMO代码如下:

/*******************************************************************************
Copyright (c) wubihe Tech. Co., Ltd. All rights reserved.
--------------------------------------------------------------------------------
Date Created:	2014-10-25
Author:			wubihe QQ:1269122125 Email:1269122125@qq.com
Description:	本例子解析WAV头,并打印音频相关信息
--------------------------------------------------------------------------------
Modification History
DATE          AUTHOR          DESCRIPTION
--------------------------------------------------------------------------------
********************************************************************************/
 
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <io.h>
 
#ifndef HAVE_INT32_T
typedef signed int int32_t;
#endif
#ifndef HAVE_INT16_T
typedef signed short int16_t;
#endif
#ifndef HAVE_U_INT32_T
typedef unsigned int u_int32_t;
#endif
#ifndef HAVE_U_INT16_T
typedef unsigned short u_int16_t;
#endif
 
#ifdef WORDS_BIGENDIAN
# define UINT32(x) SWAP32(x)
# define UINT16(x) SWAP16(x)
#else
# define UINT32(x) (x)
# define UINT16(x) (x)
#endif
 
typedef struct
{
	FILE *f;
	int channels;
	int samplebytes;
	int samplerate;
	int samples;
	int bigendian;
	int isfloat;
} pcmfile_t;
 
 
typedef struct
{
	u_int32_t label;           /* 'RIFF' */
	u_int32_t length;        /* Length of rest of file */
	u_int32_t chunk_type;      /* 'WAVE' */
}
riff_t;
 
typedef struct
{
	u_int32_t label;
	u_int32_t len;
}
riffsub_t;
 
#ifdef _MSC_VER
#pragma pack(push, 1)
#endif
 
#define WAVE_FORMAT_PCM		1
#define WAVE_FORMAT_FLOAT	3
#define WAVE_FORMAT_EXTENSIBLE	0xfffe
#define INPUT_FILE ("huangdun.wav")
#define OUTPUT_FILE ("huangdun_r48000_FMT_S16_c2.pcm")
struct WAVEFORMATEX
{
	u_int16_t wFormatTag;
	u_int16_t nChannels;
	u_int32_t nSamplesPerSec;
	u_int32_t nAvgBytesPerSec;
	u_int16_t nBlockAlign;
	u_int16_t wBitsPerSample;
	u_int16_t cbSize;
}
#ifdef __GNUC
__attribute__((packed))
#endif
;
 
struct WAVEFORMATEXTENSIBLE
{
	struct WAVEFORMATEX Format;
	union {
		u_int16_t wValidBitsPerSample;	// bits of precision
		u_int16_t wSamplesPerBlock;		// valid if wBitsPerSample==0
		u_int16_t wReserved;		// If neither applies, set to zero.
	} Samples;
	u_int32_t dwChannelMask;		// which channels are present in stream
	unsigned char SubFormat[16];		// guid
}
#ifdef __GNUC
__attribute__((packed))
#endif
;
 
#ifdef _MSC_VER
#pragma pack(pop)
#endif
 
static unsigned char waveformat_pcm_guid[16] =
{
	WAVE_FORMAT_PCM,0,0,0,
	0x00, 0x00,
	0x10, 0x00,
	0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71
};
 
static void unsuperr(const char *name)
{
	fprintf(stderr, "%s: file format not supported\n", name);
}
 
 
pcmfile_t *wav_open_read(const char *name, int rawinput)
{
	int i;
	int skip;
	FILE *wave_f;
	riff_t riff;
	riffsub_t riffsub;
	struct WAVEFORMATEXTENSIBLE wave;
	char *riffl = "RIFF";
	char *wavel = "WAVE";
	char *bextl = "BEXT";
	char *fmtl = "fmt ";
	char *datal = "data";
	int fmtsize;
	pcmfile_t *sndf;
	int dostdin = 0;
 
	if (!strcmp(name, "-"))
	{
#ifdef _WIN32
		_setmode(_fileno(stdin), O_BINARY);
#endif
		wave_f = stdin;
		dostdin = 1;
	}
	else if (!(wave_f = fopen(name, "rb")))
	{
		perror(name);
		return NULL;
	}
 
	if (!rawinput) // header input
	{
		if (fread(&riff, 1, sizeof(riff), wave_f) != sizeof(riff))
			return NULL;
		if (memcmp(&(riff.label), riffl, 4))
			return NULL;
		if (memcmp(&(riff.chunk_type), wavel, 4))
			return NULL;
 
		// handle broadcast extensions. added by pro-tools,otherwise it must be fmt chunk.
		if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
			return NULL;
		riffsub.len = UINT32(riffsub.len);
 
		if (!memcmp(&(riffsub.label), bextl, 4))
		{
			fseek(wave_f, riffsub.len, SEEK_CUR);
 
			if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
				return NULL;
			riffsub.len = UINT32(riffsub.len);
		}
 
		if (memcmp(&(riffsub.label), fmtl, 4))
			return NULL;
		memset(&wave, 0, sizeof(wave));
 
		fmtsize = (riffsub.len < sizeof(wave)) ? riffsub.len : sizeof(wave);
		if (fread(&wave, 1, fmtsize, wave_f) != fmtsize)
			return NULL;
 
		for (skip = riffsub.len - fmtsize; skip > 0; skip--)
			fgetc(wave_f);
 
		for (i = 0;; i++)
		{
			if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
				return NULL;
			riffsub.len = UINT32(riffsub.len);
			if (!memcmp(&(riffsub.label), datal, 4))
				break;
			if (i > 10)
				return NULL;
 
			for (skip = riffsub.len; skip > 0; skip--)
				fgetc(wave_f);
		}
		if (UINT16(wave.Format.wFormatTag) != WAVE_FORMAT_PCM && UINT16(wave.Format.wFormatTag) != WAVE_FORMAT_FLOAT)
		{
			if (UINT16(wave.Format.wFormatTag) == WAVE_FORMAT_EXTENSIBLE)
			{
				if (UINT16(wave.Format.cbSize) < 22) // struct too small
					return NULL;
				if (memcmp(wave.SubFormat, waveformat_pcm_guid, 16))
				{
					waveformat_pcm_guid[0] = WAVE_FORMAT_FLOAT;
					if (memcmp(wave.SubFormat, waveformat_pcm_guid, 16))
					{          
						unsuperr(name);
						return NULL;
					}
				}
			}
			else
			{
				unsuperr(name);
				return NULL;
			}
		}
	}
 
	sndf = (pcmfile_t *)malloc(sizeof(*sndf));
	memset(sndf, 0, sizeof(*sndf));
	sndf->f = wave_f;
 
	if (UINT16(wave.Format.wFormatTag) == WAVE_FORMAT_FLOAT) {
		sndf->isfloat = 1;
	} else {
		sndf->isfloat = (wave.SubFormat[0] == WAVE_FORMAT_FLOAT);
	}
	if (rawinput)
	{
		sndf->bigendian = 1;
		if (dostdin)
			sndf->samples = 0;
		else
		{
			fseek(sndf->f, 0 , SEEK_END);
			sndf->samples = ftell(sndf->f);
			rewind(sndf->f);
		}
	}
	else
	{
		sndf->bigendian = 0;
		sndf->channels = UINT16(wave.Format.nChannels);
		sndf->samplebytes = UINT16(wave.Format.wBitsPerSample) / 8;
		sndf->samplerate = UINT32(wave.Format.nSamplesPerSec);
		sndf->samples = riffsub.len / (sndf->samplebytes * sndf->channels);
	}
	return sndf;
}
 
int wav_close(pcmfile_t *sndf)
{
	int i = fclose(sndf->f);
	free(sndf);
	return i;
}
 
 
int main()
{
	FILE *fpout;
 
	fpout=fopen(OUTPUT_FILE,"wb+");
	if(fpout == NULL) 
	{
		printf("Create pcm file error\n");
		return -1;
	}
 
	pcmfile_t * pPcmFile = wav_open_read(INPUT_FILE, 0);
 
	printf("channels:%1d\t samplebytes:%1d\t samplerate:%06d\t samples:%07d\t bigendian:%1d\n",pPcmFile->channels,pPcmFile->samplebytes,pPcmFile->samplerate,pPcmFile->samples,pPcmFile->bigendian);
 
	unsigned short usSample;
	
	/*while(!feof(pPcmFile->f))
	{
		fread(&usSample,sizeof(unsigned short),1,pPcmFile->f);
		fwrite(&usSample,sizeof(unsigned short),1,fpout);
		
	}*/

    fread(&usSample,sizeof(unsigned short),1,pPcmFile->f);
    while(!feof(pPcmFile->f))//feof结尾EOF判断会多一次循环,导致PCM文件末尾多写两个字节,修改为先读后判断。
    {
      //fread(&usSample,sizeof(unsigned short),1,pPcmFile->f);
      fwrite(&usSample,sizeof(unsigned short),1,fpout);
      fread(&usSample,sizeof(unsigned short),1,pPcmFile->f);

    }

 
 
	wav_close(pPcmFile);
	fclose(fpout);
	printf("Parser WAV Success!!");
	getchar();
 
	return 0;
}

  运行结果保存PCM文件

PCM文件加WAV头

ADTS格式AAC帧获取

ADTS结构

ADTSHeader结构

/*
 * ADTS Header: 
 *  MPEG-2 version 56 bits (byte aligned) 
 *  MPEG-4 version 56 bits (byte aligned) - note - changed for 0.99 version
 *
 * syncword						12 bits
 * id							1 bit
 * layer						2 bits
 * protection_absent			1 bit
 * profile						2 bits
 * sampling_frequency_index		4 bits
 * private						1 bit
 * channel_configuraton			3 bits
 * original						1 bit
 * home							1 bit
 * copyright_id					1 bit
 * copyright_id_start			1 bit
 * aac_frame_length				13 bits
 * adts_buffer_fullness			11 bits
 * num_raw_data_blocks			2 bits
 *
 * if (protection_absent == 0)
 *	crc_check					16 bits
 */

  程序运行结果

PCM转G711A/G711U

音频编码分为波形编码和参数编码,常见得编码方式如AAC等是两者之间的编码方式。波形编码就是对声波波形的采样数据进行编码,完全不考虑这个波内部的信息,如时域或者频域上的冗余。参数编码如一个正弦波我们不需要知道在不同时间采样数值,只有知道振幅,频率,相位等信息,编码只保存该信息,在接收方按照这些参数重新建立波形即可播放。G711A/G711U就是波形编码,编码比较简单,只是把样本值从PCM的存储方式16Bit压缩成8Bit,在安防和电话中有应用。DEMO实现把PCM编码成G711A,代码如下:

#include <stdio.h>
#include "g711.h"
 
#define INPUT_FILE_NAME			 ("huangdun_r48000_FMT_S16_c2.pcm")
#define OUTPUT_FILE_NAME		 ("huangdun_r48000_FMT_S16_c2.g711a")
int main()
{
	FILE*pInputFile = fopen(INPUT_FILE_NAME, "rb");
	if (pInputFile == NULL)
	{
		/* unable to open file */
		fprintf(stderr, "Error opening file: %s\n", INPUT_FILE_NAME);
		return 1;
	}
	FILE*pOutputFile=fopen(OUTPUT_FILE_NAME,"wb+");
	if(pOutputFile == NULL) 
	{
		printf("Create g711a file error\n");
		return -1;
	}
	signed short usSample ;
	unsigned char ucG711Sample;
	int iReadCnt=0;
	int iWriteCnt=0;
	while(!feof(pInputFile))
	{
		fread(&usSample,sizeof(unsigned short),1,pInputFile);
		iReadCnt+=2;
		ucG711Sample = ALaw_Encode(usSample);
		fwrite(&ucG711Sample,1,1,pOutputFile);
		iWriteCnt+=1;
	}
 
	fclose(pInputFile);
	fclose(pOutputFile);
	printf("ReadCnt:%d WriteCnt:%d PCM TO G711A Success!!!",iReadCnt,iWriteCnt);
	getchar();
 
}

  

 

posted @ 2019-04-12 16:30  小菜77  阅读(4004)  评论(0编辑  收藏  举报