windows平台,实现录音功能详解
音频处理分为播放和录音两类。对这些处理,微软提供了一些列函数,称之为Waveform Functions。这篇文章讨论录音功能。会对微软提供的函数做简单说明,并对这些函数封装成c++类,再进一步封装成c#类。
1 Waveform Functions函数简介
根据录音处理步骤,对这些函数做简单介绍。
1.1 waveInOpen
MMRESULT waveInOpen(
LPHWAVEIN phwi,
UINT uDeviceID,
LPCWAVEFORMATEX pwfx,
DWORD_PTR dwCallback,
DWORD_PTR dwCallbackInstance,
DWORD fdwOpen
);
pwfx为录音格式。普通对讲录音一般采样频率为8000HZ,位长为16bit,单声道。fdwOpen为回调类型,一般采用CALLBACK_FUNCTION,就是函数回调方式。当有录音设备打开、关闭、录音完成等事件发生时,系统会调用我们提供的回调函数。
1.2 waveInPrepareHeader,waveInAddBuffer
当录音设备打开后,需要你提供内存区域来存放录音数据。这两个函数就是完成这项功能。waveInPrepareHeader是准备内存,waveInAddBuffer是将内存加入到录音队列。
当录音完毕,会有回调通知,这时我们提供的内存中就存放着录音数据。回调函数是通过waveInOpen函数的dwCallbackInstance指定的。为了保持录音的连续性,录音队列要时时刻刻不能为空。录音队列的内存块个数一般要超过3个。就是第一次准备3个内存块。当有录音完毕,内存块个数会减1,这时我们立即补充一个内存块。
1.3 waveInStart,waveInStop
这两个函数分别是启动和停止录音。一切准备完毕后,调用waveInStart,才会开始录音。
1.4 waveInClose
关闭录音。这个函数看起来非常简单,其实不然。这个函数会引发一些列事件,需要把这些事件处理好,否则会导致内存泄漏。当该函数被调用时,尚未存放录音的内存块会通过回调通知我们,这时需要将这些内存释放掉。
2 音频函数的c++封装
封装目的就是隐藏细节,提供一种易于使用的调用模式。通过上文可以看到有几个细节难于处理:函数回调、内存块准备、内存释放。本类将这些细节隐藏,对外提供的模式为:
打开录音设备--》启动录音--》不停轮询,读取已录音成功的数据--》关闭录音
上述处理过程完全是线性化的。隐藏了数据准备、函数回调、内存释放等细节。封装类如下:
头文件
#pragma once #include "Mmsystem.h" #include <list> #include <queue> #include "osType.h" class PcmRecord { public: PcmRecord(); ~PcmRecord(); BOOL IsOpen(); void SetRecordDataLen(int len); //每个录音块长度 BOOL Open(int nSamplesPerSec, int wBitsPerSample, int nChannels); void Close(); BOOL WaitRecordedData(int waitMillisecond); int GetRecordData(char* buffer, int bufferLen, int& bufferReadLen, int waitMillisecond = 0); BOOL StartRecord(); BOOL StopRecord(); private: BOOL AddRecordBuffer(); BOOL HaveRecordingBuffer(); void AddToRecording(WAVEHDR *header); void RemoveFromRecording(WAVEHDR *header); void OnRcvRecordData(WAVEHDR * header); void AddToRecorded(WAVEHDR *header); void DelAllRecordData(); void PrepareRecordData(int count); void OnClose(); private: BOOL m_bInClosing; HWAVEIN m_hWaveIn; WAVEFORMATEX m_waveForm; int m_recordBufferLen; std::list<WAVEHDR*> m_listWaveInRecording; CCritical m_recordingLock; std::queue<WAVEHDR*> m_listWaveInRecorded; CCritical m_recordedLock; HANDLE m_recordedDataEvent; static void WaveInProc(HWAVEOUT hwo, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2); };
实现文件
#include "stdafx.h" #include "PcmRecord.h" const int MaxDataCountInRecording = 10; //同时准备多少个 正在录音的buffer void FreeWaveHeader(WAVEHDR *header); PcmRecord::PcmRecord() { m_hWaveIn = NULL; m_recordBufferLen = 1600; m_recordedDataEvent = CreateEvent(NULL, FALSE, FALSE, L""); } PcmRecord::~PcmRecord() { Close(); } void PcmRecord::WaveInProc(HWAVEOUT hwo, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2) { PcmRecord *record = (PcmRecord*)dwInstance; if (uMsg == WOM_OPEN) //音频打开 { return; } if (uMsg == WOM_CLOSE) //音频句柄关闭 { record->OnClose(); return; } if (uMsg == WIM_DATA)//获取了录制数据 { WAVEHDR *header = (WAVEHDR*)dwParam1; record->OnRcvRecordData(header); } } void PcmRecord::OnClose() { if (!m_bInClosing) Close(); } void PcmRecord::OnRcvRecordData(WAVEHDR *header) { //MMRESULT mmres = waveInUnprepareHeader(m_hWaveIn, header, sizeof(WAVEHDR)); RemoveFromRecording(header); if (header->dwBytesRecorded > 0) { AddToRecorded(header); } else { FreeWaveHeader(header); } if (!m_bInClosing) { PrepareRecordData(MaxDataCountInRecording); } } void PcmRecord::AddToRecorded(WAVEHDR * header) { { CCriticalLock lock(m_recordedLock); m_listWaveInRecorded.push(header); } SetEvent(m_recordedDataEvent); } void PcmRecord::DelAllRecordData() { CCriticalLock lock(m_recordedLock); while (m_listWaveInRecorded.size() > 0) { WAVEHDR *header = m_listWaveInRecorded.front(); m_listWaveInRecorded.pop(); FreeWaveHeader(header); } } BOOL PcmRecord::WaitRecordedData(int waitMillisecond) { { CCriticalLock lock(m_recordedLock); if (m_listWaveInRecorded.size() > 0) return TRUE; } WaitForSingleObject(m_recordedDataEvent, waitMillisecond); { CCriticalLock lock(m_recordedLock); return (m_listWaveInRecorded.size() > 0); } } int PcmRecord::GetRecordData(char* buffer, int bufferLen, int& bufferReadLen, int waitMillisecond) { bufferReadLen = 0; BOOL haveData; { // 因为有WaitForSingleObject调用,等待时间可能很长,所以要快速解锁 CCriticalLock lock(m_recordedLock); haveData = m_listWaveInRecorded.size() > 0; } if (!haveData && waitMillisecond == 0) { ResetEvent(m_recordedDataEvent); return 0; } //等待数据到来 if (!haveData) { ResetEvent(m_recordedDataEvent); WaitForSingleObject(m_recordedDataEvent, waitMillisecond); } CCriticalLock lock2(m_recordedLock); int copyIndex = 0; while ((bufferLen - copyIndex) >= m_recordBufferLen && m_listWaveInRecorded.size() > 0) { WAVEHDR *header = m_listWaveInRecorded.front(); m_listWaveInRecorded.pop(); memcpy(buffer, header->lpData, header->dwBytesRecorded); copyIndex += header->dwBytesRecorded; FreeWaveHeader(header); } bufferReadLen = copyIndex; return bufferReadLen; } BOOL PcmRecord::IsOpen() { return m_hWaveIn != NULL; } BOOL PcmRecord::Open(int nSamplesPerSec, int wBitsPerSample, int nChannels) { m_waveForm.nSamplesPerSec = nSamplesPerSec; /* sample rate */ m_waveForm.wBitsPerSample = wBitsPerSample; /* sample size */ m_waveForm.nChannels = nChannels; /* channels*/ m_waveForm.cbSize = 0; /* size of _extra_ info */ m_waveForm.wFormatTag = WAVE_FORMAT_PCM; m_waveForm.nBlockAlign = (m_waveForm.wBitsPerSample * m_waveForm.nChannels) >> 3; m_waveForm.nAvgBytesPerSec = m_waveForm.nBlockAlign * m_waveForm.nSamplesPerSec; MMRESULT mmres = waveInOpen(&m_hWaveIn, WAVE_MAPPER, &m_waveForm, (DWORD_PTR)WaveInProc, (DWORD_PTR)this, CALLBACK_FUNCTION); if (mmres != MMSYSERR_NOERROR) { return FALSE; } m_bInClosing = FALSE; PrepareRecordData(MaxDataCountInRecording); return TRUE; } void PcmRecord::SetRecordDataLen(int len) { m_recordBufferLen = len; } BOOL PcmRecord::StartRecord() { MMRESULT mmres = waveInStart(m_hWaveIn); return (mmres == MMSYSERR_NOERROR); } BOOL PcmRecord::StopRecord() { MMRESULT mmres = waveInStop(m_hWaveIn); return (mmres == MMSYSERR_NOERROR); } void PcmRecord::Close() { m_bInClosing = TRUE; MMRESULT mmres = waveInReset(m_hWaveIn); int n = 0; while (HaveRecordingBuffer() && n < 500) { Sleep(1); n++; } mmres = waveInClose(m_hWaveIn); m_hWaveIn = NULL; DelAllRecordData(); } BOOL PcmRecord::HaveRecordingBuffer() { CCriticalLock lock(m_recordingLock); return m_listWaveInRecording.size() > 0; } void PcmRecord::AddToRecording(WAVEHDR *header) { CCriticalLock lock(m_recordingLock); m_listWaveInRecording.push_back(header); } void PcmRecord::RemoveFromRecording(WAVEHDR *header) { CCriticalLock lock(m_recordingLock); m_listWaveInRecording.remove(header); } void PcmRecord::PrepareRecordData(int count) { CCriticalLock lock(m_recordingLock); while (m_listWaveInRecording.size() < count) { if (!AddRecordBuffer()) return; } } BOOL PcmRecord::AddRecordBuffer() { WAVEHDR *header = new WAVEHDR(); ZeroMemory(header, sizeof(WAVEHDR)); //对应回调函数 DWORD_PTR dwParam1, header->dwUser = (DWORD_PTR)header; header->dwBufferLength = m_recordBufferLen; header->lpData = new char[m_recordBufferLen]; MMRESULT result = waveInPrepareHeader(m_hWaveIn, header, sizeof(WAVEHDR)); if (result != MMSYSERR_NOERROR) { FreeWaveHeader(header); return FALSE; } AddToRecording(header); result = waveInAddBuffer(m_hWaveIn, header, sizeof(WAVEHDR)); if (result != MMSYSERR_NOERROR) { RemoveFromRecording(header); FreeWaveHeader(header); return FALSE; } return TRUE; }
对于读取录音函数的使用特别说明一下。该函数定义如下:
int PcmRecord::GetRecordData(char* buffer, int bufferLen,int& bufferReadLen, int waitMillisecond)
bufferLen的长度要大于一个内存块。waitMillisecond为等待的毫秒数;当没有录音数据时,该函数会最多等待waitMillisecond毫秒。当waitMillisecond为0时,就是非阻塞调用。对于阻塞调用可以,采用独立线程读取;对于非阻塞,可以采用定时器方式轮询。
3 音频函数的c#封装
在对c++类实现的基础上的进一步封装为c函数,可以供c#调用。这里的关键是c++函数封装为c函数。
LIBPCMPLAY_API int64_t PcmRecord_CreateHandle(); LIBPCMPLAY_API void PcmRecord_SetRecordDataLen(int64_t handle, int len); LIBPCMPLAY_API BOOL PcmRecord_Open(int64_t handle, int nSamplesPerSec, int wBitsPerSample, int nChannels); LIBPCMPLAY_API BOOL PcmRecord_IsOpen(int64_t handle); LIBPCMPLAY_API BOOL PcmRecord_Start(int64_t handle); LIBPCMPLAY_API BOOL PcmRecord_Stop(int64_t handle); LIBPCMPLAY_API int PcmRecord_GetRecordData(int64_t handle, char* buffer, int bufferLen, int& bufferReadLen, int waitMillisecond = 0); LIBPCMPLAY_API void PcmRecord_Close(int64_t handle);
PcmRecord_CreateHandle 就是生成一个PcmRecord的实例,将该实例的指针返回。将handle定义为64位,这样32、64平台下处理方式就完全一样。handle就是类指针,这样就将复杂的类函数隐藏了。
实现函数
int64_t PcmPlay_CreateHandle() { CPcmPlay *play = new CPcmPlay(); return (int64_t)play; } BOOL PcmPlay_IsOpen(int64_t handle) { CPcmPlay *play = (CPcmPlay*)handle; return play->IsOpen(); } BOOL PcmPlay_Open(int64_t handle, int nSamplesPerSec, int wBitsPerSample, int nChannels) { CPcmPlay *play = (CPcmPlay*)handle; return play->Open(nSamplesPerSec, wBitsPerSample, nChannels); } BOOL PcmPlay_SetVolume(int64_t handle, int volume) { CPcmPlay *play = (CPcmPlay*)handle; return play->SetVolume(volume); } int PcmPlay_Play(int64_t handle, char* block, int size) { CPcmPlay *play = (CPcmPlay*)handle; return play->Play(block, size); } void PcmPlay_StopPlay(int64_t handle) { CPcmPlay *play = (CPcmPlay*)handle; play->StopPlay(); } BOOL PcmPlay_IsOnPlay(int64_t handle) { CPcmPlay *play = (CPcmPlay*)handle; return play->IsOnPlay(); } int64_t PcmPlay_GetLeftPlaySpan(int64_t handle) { CPcmPlay *play = (CPcmPlay*)handle; return play->GetLeftPlaySpan(); } int64_t PcmPlay_GetCurPlaySpan(int64_t handle) { CPcmPlay *play = (CPcmPlay*)handle; return play->GetCurPlaySpan(); } void PcmPlay_Close(int64_t handle) { CPcmPlay *play = (CPcmPlay*)handle; play->Close(); } void PcmPlay_CloseHandle(int64_t handle) { CPcmPlay *play = (CPcmPlay*)handle; play->Close(); delete play; } //录音 int64_t PcmRecord_CreateHandle() { PcmRecord *record = new PcmRecord(); return (int64_t)record; } void PcmRecord_SetRecordDataLen(int64_t handle, int len) { PcmRecord *record = (PcmRecord*)handle; record->SetRecordDataLen(len); } BOOL PcmRecord_Open(int64_t handle, int nSamplesPerSec, int wBitsPerSample, int nChannels) { PcmRecord *record = (PcmRecord*)handle; return record->Open(nSamplesPerSec,wBitsPerSample,nChannels); } BOOL PcmRecord_IsOpen(int64_t handle) { PcmRecord *record = (PcmRecord*)handle; return record->IsOpen(); } BOOL PcmRecord_Start(int64_t handle) { PcmRecord *record = (PcmRecord*)handle; return record->StartRecord(); } BOOL PcmRecord_Stop(int64_t handle) { PcmRecord *record = (PcmRecord*)handle; return record->StopRecord(); } int PcmRecord_GetRecordData(int64_t handle, char* buffer, int bufferLen, int& bufferReadLen, int waitMillisecond) { PcmRecord *record = (PcmRecord*)handle; return record->GetRecordData(buffer, bufferLen, bufferReadLen, waitMillisecond); } void PcmRecord_Close(int64_t handle) { PcmRecord *record = (PcmRecord*)handle; record->Close(); }
实现了对c语言的封装,下一步就是在c语言的基础上,封装成c#类。
class PcmRecord { long _handle = 0; int recordTimespan = 320;//每次录音长度 毫秒 int bufferLenPerSample; public PcmRecord() { bufferLenPerSample = 16* recordTimespan; } ~PcmRecord() { if(_handle != 0) { PcmRecordWrapper.PcmRecord_Close(_handle); } } bool _isOpen = false; public bool IsOpen => _isOpen; public bool Open() { if (_isOpen) throw new Exception("先关闭,再打开!"); _handle = PcmRecordWrapper.PcmRecord_CreateHandle(); PcmRecordWrapper.PcmRecord_SetRecordDataLen(_handle, bufferLenPerSample); _isOpen = PcmRecordWrapper.PcmRecord_Open(_handle, 8000, 16, 1)==1; return true; } public bool Start() { if (!_isOpen) throw new Exception("录音设备还没打开!"); return PcmRecordWrapper.PcmRecord_Start(_handle) == 1; } public bool Stop() { if (!_isOpen) throw new Exception("录音设备还没打开!"); return PcmRecordWrapper.PcmRecord_Stop(_handle) == 1; } public byte[] GetPcmData(int waitMillisecond) { if (!_isOpen) throw new Exception("录音设备还没打开!"); byte[] bufferRecord = new byte[bufferLenPerSample]; GCHandle hinBuffer = GCHandle.Alloc(bufferRecord, GCHandleType.Pinned); byte[] readLen = new byte[4]; GCHandle hinReadLen = GCHandle.Alloc(readLen, GCHandleType.Pinned); PcmRecordWrapper.PcmRecord_GetRecordData(_handle, hinBuffer.AddrOfPinnedObject(), bufferRecord.Length, hinReadLen.AddrOfPinnedObject(), waitMillisecond); hinBuffer.Free(); hinReadLen.Free(); int returnLen = BitConverter.ToInt32(readLen, 0); if (returnLen == 0) return null; if (returnLen == bufferRecord.Length) return bufferRecord; Array.Resize(ref bufferRecord, returnLen); return bufferRecord; } public void Close() { if (!_isOpen) return; PcmRecordWrapper.PcmRecord_Close(_handle); _handle = 0; _isOpen = false; } } public class PcmRecordWrapper { private const string DLLName = "LibPcmPlay.dll"; [DllImport(DLLName, EntryPoint = "PcmRecord_CreateHandle", CallingConvention = CallingConvention.Cdecl)] public static extern long PcmRecord_CreateHandle(); [DllImport(DLLName, EntryPoint = "PcmRecord_SetRecordDataLen", CallingConvention = CallingConvention.Cdecl)] public static extern void PcmRecord_SetRecordDataLen(long handle, int len); [DllImport(DLLName, EntryPoint = "PcmRecord_Open", CallingConvention = CallingConvention.Cdecl)] public static extern int PcmRecord_Open(long handle, int nSamplesPerSec, int wBitsPerSample, int nChannels); [DllImport(DLLName, EntryPoint = "PcmRecord_IsOpen", CallingConvention = CallingConvention.Cdecl)] public static extern int PcmRecord_IsOpen(long handle); [DllImport(DLLName, EntryPoint = "PcmRecord_Start", CallingConvention = CallingConvention.Cdecl)] public static extern int PcmRecord_Start(long handl); [DllImport(DLLName, EntryPoint = "PcmRecord_Stop", CallingConvention = CallingConvention.Cdecl)] public static extern int PcmRecord_Stop(long handl); [DllImport(DLLName, EntryPoint = "PcmRecord_GetRecordData", CallingConvention = CallingConvention.Cdecl)] public static extern int PcmRecord_GetRecordData(long handle, IntPtr buffer, Int32 bufferLen, IntPtr bufferReadLen, int waitMillisecond); [DllImport(DLLName, EntryPoint = "PcmRecord_Close", CallingConvention = CallingConvention.Cdecl)] public static extern void PcmRecord_Close(long handl); } }
总结:windows平台为我们提供了录音相关函数。平台提供的函数考虑到各种应用场景,使用起来非常灵活,但是容易出错,需要关注细节。合适的才是最好的。根据自身的需求,选用一种合适的处理模式;这种模式既能满足我们的功能需求,又要易于使用。本文不仅提供了对录音函数的封装,也提供一种处理复杂问题的思路。希望能抛砖引玉!
专注C#、C++。擅长WPF、WinForm、QT等技术。
研究ofd多年,开发了一些列产品。
技术交流QQ群:565438497。