百度语音识别接入遇到的问题
需要支持HTTP请求
配置正确的APPID等信息
需要导入包含.dat的ASR文件夹
作者写的接驳代码(实时解析):
#import <Foundation/Foundation.h> #import "AudioInputStream.h" #import "BDSASRDefines.h" #import "BDSASRParameters.h" #import "BDSWakeupDefines.h" #import "BDSWakeupParameters.h" #import "BDSEventManager.h" #import "BDVRSettings.h" @interface SpeechStreamHelper : NSObject <BDSClientASRDelegate> @property(nonatomic,copy)void (^haveRecognizerSpeakTextBlock)(NSString *text,NSTimeInterval startLocation,NSTimeInterval length); //开始流识别 - (void)startAudioStream; - (void)pauseAudioStream; -(void)stopAudioStream; -(void)haveHandlePCMData:(NSData*)data duration:(NSTimeInterval)duration volume:(float)volume; @end
#import "SpeechStreamHelper.h" @interface SpeechStreamHelper() @property (strong, nonatomic) BDSEventManager *asrEventManager; @property (nonatomic,assign)BOOL canStartStream; @property (nonatomic,assign)BOOL shouldStartStream; @property(nonatomic,strong)AudioInputStream *audioStream; @property(nonatomic,assign)NSTimeInterval outsideDuration; @property(nonatomic,assign)NSTimeInterval insideDuration; @end @implementation SpeechStreamHelper -(instancetype)init { self = [super init]; if (self) { self.canStartStream = YES; self.shouldStartStream = NO; [self configurationManager]; } return self; } -(void)configurationManager { self.asrEventManager = [BDSEventManager createEventManagerWithName:BDS_ASR_NAME]; [[BDVRSettings getInstance] configBDVRClient]; [self configVoiceRecognitionClient]; [self.asrEventManager setDelegate:self]; } //开始流识别 - (void)startAudioStream { [self startAudioStream:0]; } //开始流识别 - (void)startAudioStream:(NSTimeInterval)duration { self.shouldStartStream = YES; if (self.audioStream == nil && self.canStartStream) { self.audioStream = [[AudioInputStream alloc] init]; self.outsideDuration = duration; self.insideDuration = CFAbsoluteTimeGetCurrent(); [self.asrEventManager setParameter:self.audioStream forKey:BDS_ASR_AUDIO_INPUT_STREAM]; [self.asrEventManager setParameter:@"" forKey:BDS_ASR_AUDIO_FILE_PATH]; [self.asrEventManager sendCommand:BDS_ASR_CMD_START]; } } - (void)pauseAudioStream { self.shouldStartStream = NO; [self onRequestEnd]; } -(void)stopAudioStream { self.canStartStream = NO; [self pauseAudioStream]; [self.asrEventManager setDelegate:nil]; self.asrEventManager = nil; } - (void)onRequestEnd { if (self.audioStream) { [self.audioStream close]; self.audioStream = nil; } [self.asrEventManager sendCommand:BDS_ASR_CMD_STOP]; } -(void)haveHandlePCMData:(NSData*)data duration:(NSTimeInterval)duration volume:(float)volume { if (self.shouldStartStream && self.audioStream != nil) { [self.audioStream haveHandlePCMData:data]; } else if(self.shouldStartStream && volume > 26) { [self startAudioStream:duration]; } } #pragma mark - MVoiceRecognitionClientDelegate - (void)VoiceRecognitionClientWorkStatus:(int)workStatus obj:(id)aObj { switch (workStatus) { case EVoiceRecognitionClientWorkStatusNewRecordData: { { } break; } case EVoiceRecognitionClientWorkStatusStartWorkIng: { { } break; } case EVoiceRecognitionClientWorkStatusStart: { { } break; } case EVoiceRecognitionClientWorkStatusEnd: { { } break; } case EVoiceRecognitionClientWorkStatusFlushData: { { } break; } case EVoiceRecognitionClientWorkStatusFinish: { { NSString *data = [self parseResultFromDic:aObj]; if (data.length > 0) { CFAbsoluteTime currentTime = CFAbsoluteTimeGetCurrent(); NSTimeInterval duration = currentTime - self.insideDuration; if (self.haveRecognizerSpeakTextBlock) { self.haveRecognizerSpeakTextBlock(data, self.outsideDuration, duration); } } // NSLog(@"语音结果:%@ (%.2f -> %.2f) %.2f",data,self.outsideDuration,self.outsideDuration+duration,duration); [self onRequestEnd]; } break; } case EVoiceRecognitionClientWorkStatusMeterLevel: { break; } case EVoiceRecognitionClientWorkStatusCancel: { { [self onRequestEnd]; } break; } case EVoiceRecognitionClientWorkStatusError: { { [self onRequestEnd]; } break; } case EVoiceRecognitionClientWorkStatusLoaded: { { } break; } case EVoiceRecognitionClientWorkStatusUnLoaded: { { } break; } case EVoiceRecognitionClientWorkStatusChunkThirdData: { { } break; } case EVoiceRecognitionClientWorkStatusChunkNlu: { { printf("当前结果:\n"); } break; } case EVoiceRecognitionClientWorkStatusChunkEnd: { { //解析结果出现 } break; } case EVoiceRecognitionClientWorkStatusFeedback: { { } break; } case EVoiceRecognitionClientWorkStatusRecorderEnd: { { } break; } case EVoiceRecognitionClientWorkStatusLongSpeechEnd: { { } break; } default: break; } } - (NSString *)parseResultFromDic:(NSDictionary *)resultDict { NSArray *results_recognition = [resultDict valueForKey:@"results_recognition"]; if (results_recognition && [results_recognition isKindOfClass:[NSArray class]] && results_recognition.count > 0) { return [NSString stringWithFormat:@"%@",results_recognition[0]]; } return @""; } #pragma mark - Private: Configuration - (void)configVoiceRecognitionClient { //设置DEBUG_LOG的级别 // [self.asrEventManager setParameter:@(EVRDebugLogLevelTrace) forKey:BDS_ASR_DEBUG_LOG_LEVEL]; [self.asrEventManager setParameter:@(EVRDebugLogLevelOff) forKey:BDS_ASR_DEBUG_LOG_LEVEL]; //配置API_KEY 和 SECRET_KEY 和 APP_ID [self.asrEventManager setParameter:@[API_KEY, SECRET_KEY] forKey:BDS_ASR_API_SECRET_KEYS]; [self.asrEventManager setParameter:APP_ID forKey:BDS_ASR_OFFLINE_APP_CODE]; //配置端点检测(二选一) [self configModelVAD]; // [self configDNNMFE]; // [self.asrEventManager setParameter:@"15361" forKey:BDS_ASR_PRODUCT_ID]; // ---- 语义与标点 ----- [self enableNLU]; // [self enablePunctuation]; // ------------------------ } - (void) enableNLU { // ---- 开启语义理解 ----- [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_NLU]; [self.asrEventManager setParameter:@"1536" forKey:BDS_ASR_PRODUCT_ID]; } - (void) enablePunctuation { // ---- 开启标点输出 ----- [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_DISABLE_PUNCTUATION]; // 普通话标点 // [self.asrEventManager setParameter:@"1537" forKey:BDS_ASR_PRODUCT_ID]; // 英文标点 [self.asrEventManager setParameter:@"1737" forKey:BDS_ASR_PRODUCT_ID]; } - (void)configModelVAD { NSString *modelVAD_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_basic_model" ofType:@"dat"]; [self.asrEventManager setParameter:modelVAD_filepath forKey:BDS_ASR_MODEL_VAD_DAT_FILE]; [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_MODEL_VAD]; #pragma mark 下面这个模式来的不知道是否有效 [self.asrEventManager setParameter:@(0.1f) forKey:BDS_ASR_MFE_MAX_SPEECH_PAUSE]; [self.asrEventManager setParameter:@(2.f) forKey:BDS_ASR_MFE_MAX_WAIT_DURATION]; } - (void)configDNNMFE { NSString *mfe_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_dnn" ofType:@"dat"]; [self.asrEventManager setParameter:mfe_dnn_filepath forKey:BDS_ASR_MFE_DNN_DAT_FILE]; NSString *cmvn_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_cmvn" ofType:@"dat"]; [self.asrEventManager setParameter:cmvn_dnn_filepath forKey:BDS_ASR_MFE_CMVN_DAT_FILE]; [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_ENABLE_MODEL_VAD]; // MFE支持自定义静音时长 // [self.asrEventManager setParameter:@(200.f) forKey:BDS_ASR_MFE_MAX_SPEECH_PAUSE]; // [self.asrEventManager setParameter:@(400.f) forKey:BDS_ASR_MFE_MAX_WAIT_DURATION]; } @end
改写的百度Demo代码
#import <Foundation/Foundation.h> @interface AudioInputStream : NSInputStream -(void)haveHandlePCMData:(NSData*)data; @end
#import "AudioInputStream.h" #import <AudioToolbox/AudioToolbox.h> #import <AVFoundation/AVAudioSession.h> #include "AudioDataQueue.hpp" @interface AudioInputStream () { BOOL isRecording; AudioDataQueue *audioData; } // Developer should set the status depens on your data flow. @property (nonatomic, assign) NSStreamStatus status; @end @implementation AudioInputStream @synthesize delegate; - (instancetype)init { if (self = [super init]) { _status = NSStreamStatusNotOpen; isRecording = false; } return self; } - (void)open { /* ** any operation to open data source, do it here. */ [self startRecording]; } - (void)close { /* ** clean up the data source. */ [self stopRecorder]; } #pragma mark - Custom - (BOOL)hasBytesAvailable; { return YES; } - (NSStreamStatus)streamStatus; { return self.status; } - (NSInteger)read:(uint8_t *)buffer maxLength:(NSUInteger)len { @synchronized (self) { if (audioData == NULL || !isRecording) { return 0; } else { int dataLength = audioData->dequeSamples(buffer, (int)len, true); return dataLength; } } } - (BOOL)getBuffer:(uint8_t * _Nullable *)buffer length:(NSUInteger *)len { return NO; } #pragma mark - Data Source - (void)stopRecorder { if (!isRecording) { return; } isRecording = false; @synchronized(self) { delete audioData; } } - (void)startRecording { [self clearupRecording]; isRecording = YES; } - (void)clearupRecording { audioData = new AudioDataQueue(16000*2*2); audioData->reset(); } #pragma mark - Static callback -(void)haveHandlePCMData:(NSData*)data { if (data.length > 0) { @synchronized (self) { if (isRecording) { audioData->queueAudio((const uint8_t *)data.bytes, (int)data.length); } } } } @end
百度写的数据缓存队列:
#ifndef AudioDataQueue_hpp #define AudioDataQueue_hpp class AudioDataQueue { public: AudioDataQueue(int bufferCapacity = 0); int queueAudio(const uint8_t* audioData, int dataLength); int dequeSamples(uint8_t* dataBuffer, int bufferSize, bool dequeRemaining); bool haveData(); void reset(); ~AudioDataQueue(); private: uint8_t* mData; int mDataLength; int mBufferCapacity; uint8_t* mLoopStart; uint8_t* mLoopEnd; uint8_t* mDataEnd; }; #endif /* AudioDataQueue_hpp */
#include <stdlib.h> #include <string.h> #include <stdio.h> #include "AudioDataQueue.hpp" int AudioDataQueue::queueAudio(const uint8_t* audioData, int dataLength) { if(dataLength == 0) return mDataLength; if (dataLength > mBufferCapacity) { audioData += (dataLength-mBufferCapacity); dataLength = mBufferCapacity; } long remainingLen = mDataEnd - mLoopEnd; long rightLen = remainingLen >= dataLength ? dataLength : remainingLen; memcpy(mLoopEnd, audioData, rightLen); mLoopEnd += rightLen; if (mLoopEnd == mDataEnd) { mLoopEnd = mData; } long leftLen = dataLength > rightLen ? dataLength - rightLen : 0; if (leftLen > 0) { memcpy(mLoopEnd, audioData + rightLen, leftLen); mLoopEnd += leftLen; } mDataLength += dataLength; if (mDataLength >= mBufferCapacity) { mDataLength = mBufferCapacity; mLoopStart = mLoopEnd; } return mDataLength; } int AudioDataQueue::dequeSamples(uint8_t* dataBuffer, int bufferSize, bool dequeRemaining) { if (mDataLength >= bufferSize || dequeRemaining) { long tmp = mDataEnd - mLoopStart; long dataRightLen = tmp >= mDataLength ? mDataLength : tmp; long rightLen = dataRightLen >= bufferSize ? bufferSize : dataRightLen; memcpy(dataBuffer, mLoopStart, rightLen); mLoopStart += rightLen; if (mLoopStart == mDataEnd) { mLoopStart = mData; } long leftLen = 0; long left = bufferSize - rightLen; if (left > 0) { long dataLeftLen = mDataLength > dataRightLen ? mDataLength - dataRightLen : 0; leftLen = dataLeftLen >= left ? left : dataLeftLen; memcpy(dataBuffer + rightLen, mLoopStart, leftLen); mLoopStart += leftLen; } mDataLength -= bufferSize; if (mDataLength <= 0) { mDataLength = 0; mLoopStart = mLoopEnd = mData; } return (int)(rightLen + leftLen); } return 0; } bool AudioDataQueue::haveData() { return (mDataLength > 0); } void AudioDataQueue::reset() { mDataLength = 0; mDataEnd = mData + mBufferCapacity; mLoopStart = mLoopEnd = mData; } AudioDataQueue::AudioDataQueue(int bufferCapacity) { mDataLength = 0; mBufferCapacity = bufferCapacity; mData = (uint8_t*)malloc(mBufferCapacity); mDataEnd = mData + mBufferCapacity; mLoopStart = mLoopEnd = mData; } AudioDataQueue::~AudioDataQueue() { if(mData) { free(mData); mData = NULL; mDataEnd = NULL; mLoopStart = NULL; mLoopEnd = NULL; } mDataLength = 0; mBufferCapacity = 0; }