iOS中 语音识别功能/语音转文字教程具体解释 韩俊强的博客
原文地址:http://blog.csdn.net/qq_31810357/article/details/51111702
前言:近期研究了一下语音识别,从百度语音识别到讯飞语音识别;首先说一下个人针对两者的看法,讯飞毫无疑问比較专业。识别率也非常高真对语音识别是比較精准的,可是非常多开发人员和我一样期望离线识别,而讯飞离线是收费的;请求次数来讲。两者都能够申请高配额,针对用户较多的差点儿都一样。
基于免费而且支持离线我选择了百度离线语音识别。比較简单,UI设计多一点,以下写一下教程:
1.首先:须要的库
2.我是自己定义的UI所以以功能实现为主(头文件)
// 头文件 #import "BDVRCustomRecognitonViewController.h" #import "BDVRClientUIManager.h" #import "WBVoiceRecordHUD.h" #import "BDVRViewController.h" #import "MyViewController.h" #import "BDVRSConfig.h"
3.须要知道的功能:能用到的例如以下:
//-------------------类方法------------------------ // 创建语音识别客户对像,该对像是个单例 + (BDVoiceRecognitionClient *)sharedInstance; // 释放语音识别客户端对像 + (void)releaseInstance; //-------------------识别方法----------------------- // 推断能否够录音 - (BOOL)isCanRecorder; // 開始语音识别,须要实现MVoiceRecognitionClientDelegate代理方法。并传入实现对像监听事件 // 返回值參考 TVoiceRecognitionStartWorkResult - (int)startVoiceRecognition:(id<MVoiceRecognitionClientDelegate>)aDelegate; // 说完了,用户主动完毕录音时调用 - (void)speakFinish; // 结束本次语音识别 - (void)stopVoiceRecognition; /** * @brief 获取当前识别的採样率 * * @return 採样率(16000/8000) */ - (int)getCurrentSampleRate; /** * @brief 得到当前识别模式(deprecated) * * @return 当前识别模式 */ - (int)getCurrentVoiceRecognitionMode __attribute__((deprecated)); /** * @brief 设置当前识别模式(deprecated)。请使用-(void)setProperty:(TBDVoiceRecognitionProperty)property; * * @param 识别模式 * * @return 是否设置成功 */ - (void)setCurrentVoiceRecognitionMode:(int)aMode __attribute__((deprecated)); // 设置识别类型 - (void)setProperty:(TBDVoiceRecognitionProperty)property __attribute__((deprecated)); // 获取当前识别类型 - (int)getRecognitionProperty __attribute__((deprecated)); // 设置识别类型列表, 除EVoiceRecognitionPropertyInput和EVoiceRecognitionPropertySong外 // 能够识别类型复合 - (void)setPropertyList: (NSArray*)prop_list; // cityID仅对EVoiceRecognitionPropertyMap识别类型有效 - (void)setCityID: (NSInteger)cityID; // 获取当前识别类型列表 - (NSArray*)getRecognitionPropertyList; //-------------------提示音----------------------- // 播放提示音。默觉得播放,录音開始,录音结束提示音 // BDVoiceRecognitionClientResources/Tone // record_start.caf 录音開始声音文件 // record_end.caf 录音结束声音文件 // 声音资源须要加到项目project里。用户可替换资源文件,文件名称不能够变,建音提示音不宜过长,0。5秒左右。 // aTone 取值參考 TVoiceRecognitionPlayTones,如没有找到文件。则返回NO - (BOOL)setPlayTone:(int)aTone isPlay:(BOOL)aIsPlay;
4.录音button相关动画(我自己定义的,大家能够借鉴)
// 录音button相关 @property (nonatomic, weak, readonly) UIButton *holdDownButton;// 说话button /** * 是否取消錄音 */ @property (nonatomic, assign, readwrite) BOOL isCancelled; /** * 是否正在錄音 */ @property (nonatomic, assign, readwrite) BOOL isRecording; /** * 当录音button被按下所触发的事件,这时候是開始录音 */ - (void)holdDownButtonTouchDown; /** * 当手指在录音button范围之外离开屏幕所触发的事件。这时候是取消录音 */ - (void)holdDownButtonTouchUpOutside; /** * 当手指在录音button范围之内离开屏幕所触发的事件,这时候是完毕录音 */ - (void)holdDownButtonTouchUpInside; /** * 当手指滑动到录音button的范围之外所触发的事件 */ - (void)holdDownDragOutside;
5.初始化系统UI
#pragma mark - layout subViews UI /** * 依据正常显示和高亮状态创建一个button对象 * * @param image 正常显示图 * @param hlImage 高亮显示图 * * @return 返回button对象 */ - (UIButton *)createButtonWithImage:(UIImage *)image HLImage:(UIImage *)hlImage ; - (void)holdDownDragInside; - (void)createInitView; // 创建初始化界面。播放提示音时会用到 - (void)createRecordView; // 创建录音界面 - (void)createRecognitionView; // 创建识别界面 - (void)createErrorViewWithErrorType:(int)aStatus; // 在识别view中显示具体错误信息 - (void)createRunLogWithStatus:(int)aStatus; // 在状态view中显示具体状态信息 - (void)finishRecord:(id)sender; // 用户点击完毕动作 - (void)cancel:(id)sender; // 用户点击取消动作 - (void)startVoiceLevelMeterTimer; - (void)freeVoiceLevelMeterTimerTimer;
6.最重要的部分
// 录音完毕 [[BDVoiceRecognitionClient sharedInstance] speakFinish];
// 取消录音 [[BDVoiceRecognitionClient sharedInstance] stopVoiceRecognition];
7.两个代理方法
- (void)VoiceRecognitionClientWorkStatus:(int)aStatus obj:(id)aObj { switch (aStatus) { case EVoiceRecognitionClientWorkStatusFlushData: // 连续上屏中间结果 { NSString *text = [aObj objectAtIndex:0]; if ([text length] > 0) { // [clientSampleViewController logOutToContinusManualResut:text]; UILabel *clientWorkStatusFlushLabel = [[UILabel alloc]initWithFrame:CGRectMake(kScreenWidth/2 - 100,64,200,60)]; clientWorkStatusFlushLabel.text = text; clientWorkStatusFlushLabel.textAlignment = NSTextAlignmentCenter; clientWorkStatusFlushLabel.font = [UIFont systemFontOfSize:18.0f]; clientWorkStatusFlushLabel.numberOfLines = 0; clientWorkStatusFlushLabel.backgroundColor = [UIColor whiteColor]; [self.view addSubview:clientWorkStatusFlushLabel]; } break; } case EVoiceRecognitionClientWorkStatusFinish: // 识别正常完毕并获得结果 { [self createRunLogWithStatus:aStatus]; if ([[BDVoiceRecognitionClient sharedInstance] getRecognitionProperty] != EVoiceRecognitionPropertyInput) { // 搜索模式下的结果为数组,演示样例为 // ["公园", "公元"] NSMutableArray *audioResultData = (NSMutableArray *)aObj; NSMutableString *tmpString = [[NSMutableString alloc] initWithString:@""]; for (int i=0; i < [audioResultData count]; i++) { [tmpString appendFormat:@"%@\r\n",[audioResultData objectAtIndex:i]]; } clientSampleViewController.resultView.text = nil; [clientSampleViewController logOutToManualResut:tmpString]; } else { NSString *tmpString = [[BDVRSConfig sharedInstance] composeInputModeResult:aObj]; [clientSampleViewController logOutToContinusManualResut:tmpString]; } if (self.view.superview) { [self.view removeFromSuperview]; } break; } case EVoiceRecognitionClientWorkStatusReceiveData: { // 此状态仅仅有在输入模式下使用 // 输入模式下的结果为带置信度的结果,示比例如以下: // [ // [ // { // "百度" = "0.6055192947387695"; // }, // { // "摆渡" = "0.3625582158565521"; // }, // ] // [ // { // "一下" = "0.7665404081344604"; // } // ], // ] //临时关掉 -- 否则影响跳转结果 // NSString *tmpString = [[BDVRSConfig sharedInstance] composeInputModeResult:aObj]; // [clientSampleViewController logOutToContinusManualResut:tmpString]; break; } case EVoiceRecognitionClientWorkStatusEnd: // 用户说话完毕,等待server返回识别结果 { [self createRunLogWithStatus:aStatus]; if ([BDVRSConfig sharedInstance].voiceLevelMeter) { [self freeVoiceLevelMeterTimerTimer]; } [self createRecognitionView]; break; } case EVoiceRecognitionClientWorkStatusCancel: { if ([BDVRSConfig sharedInstance].voiceLevelMeter) { [self freeVoiceLevelMeterTimerTimer]; } [self createRunLogWithStatus:aStatus]; if (self.view.superview) { [self.view removeFromSuperview]; } break; } case EVoiceRecognitionClientWorkStatusStartWorkIng: // 识别库開始识别工作,用户能够说话 { if ([BDVRSConfig sharedInstance].playStartMusicSwitch) // 假设播放了提示音。此时再给用户提示能够说话 { [self createRecordView]; } if ([BDVRSConfig sharedInstance].voiceLevelMeter) // 开启语音音量监听 { [self startVoiceLevelMeterTimer]; } [self createRunLogWithStatus:aStatus]; break; } case EVoiceRecognitionClientWorkStatusNone: case EVoiceRecognitionClientWorkPlayStartTone: case EVoiceRecognitionClientWorkPlayStartToneFinish: case EVoiceRecognitionClientWorkStatusStart: case EVoiceRecognitionClientWorkPlayEndToneFinish: case EVoiceRecognitionClientWorkPlayEndTone: { [self createRunLogWithStatus:aStatus]; break; } case EVoiceRecognitionClientWorkStatusNewRecordData: { break; } default: { [self createRunLogWithStatus:aStatus]; if ([BDVRSConfig sharedInstance].voiceLevelMeter) { [self freeVoiceLevelMeterTimerTimer]; } if (self.view.superview) { [self.view removeFromSuperview]; } break; } } }
- (void)VoiceRecognitionClientNetWorkStatus:(int) aStatus { switch (aStatus) { case EVoiceRecognitionClientNetWorkStatusStart: { [self createRunLogWithStatus:aStatus]; [[UIApplication sharedApplication] setNetworkActivityIndicatorVisible:YES]; break; } case EVoiceRecognitionClientNetWorkStatusEnd: { [self createRunLogWithStatus:aStatus]; [[UIApplication sharedApplication] setNetworkActivityIndicatorVisible:NO]; break; } } }
8.录音button的一些操作
#pragma mark ------ 关于button操作的一些事情------- - (void)holdDownButtonTouchDown { // 開始动画 _disPlayLink = [CADisplayLink displayLinkWithTarget:self selector:@selector(delayAnimation)]; _disPlayLink.frameInterval = 40; [_disPlayLink addToRunLoop:[NSRunLoop currentRunLoop] forMode:NSDefaultRunLoopMode]; self.isCancelled = NO; self.isRecording = NO; // 開始语音识别功能,之前必须实现MVoiceRecognitionClientDelegate协议中的VoiceRecognitionClientWorkStatus:obj方法 int startStatus = -1; startStatus = [[BDVoiceRecognitionClient sharedInstance] startVoiceRecognition:self]; if (startStatus != EVoiceRecognitionStartWorking) // 创建失败则报告错误 { NSString *statusString = [NSString stringWithFormat:@"%d",startStatus]; [self performSelector:@selector(firstStartError:) withObject:statusString afterDelay:0.3]; // 延迟0.3秒。以便能在出错时正常删除view return; } // "按住说话-松开搜索"提示 [voiceImageStr removeFromSuperview]; voiceImageStr = [[UIImageView alloc]initWithFrame:CGRectMake(kScreenWidth/2 - 40, kScreenHeight - 153, 80, 33)]; voiceImageStr.backgroundColor = [UIColor colorWithPatternImage:[UIImage imageNamed:@"searchVoice"]]; [self.view addSubview:voiceImageStr]; } - (void)holdDownButtonTouchUpOutside { // 结束动画 [self.view.layer removeAllAnimations]; [_disPlayLink invalidate]; _disPlayLink = nil; // 取消录音 [[BDVoiceRecognitionClient sharedInstance] stopVoiceRecognition]; if (self.view.superview) { [self.view removeFromSuperview]; } } - (void)holdDownButtonTouchUpInside { // 结束动画 [self.view.layer removeAllAnimations]; [_disPlayLink invalidate]; _disPlayLink = nil; [[BDVoiceRecognitionClient sharedInstance] speakFinish]; } - (void)holdDownDragOutside { //假设已經開始錄音了, 才须要做拖曳出去的動作, 否則仅仅要切換 isCancelled, 不讓錄音開始. if (self.isRecording) { // if ([self.delegate respondsToSelector:@selector(didDragOutsideAction)]) { // [self.delegate didDragOutsideAction]; // } } else { self.isCancelled = YES; } } #pragma mark - layout subViews UI - (UIButton *)createButtonWithImage:(UIImage *)image HLImage:(UIImage *)hlImage { UIButton *button = [[UIButton alloc] initWithFrame:CGRectMake(kScreenWidth/2 -36, kScreenHeight - 120, 72, 72)]; if (image) [button setBackgroundImage:image forState:UIControlStateNormal]; if (hlImage) [button setBackgroundImage:hlImage forState:UIControlStateHighlighted]; return button; } #pragma mark ----------- 动画部分 ----------- - (void)startAnimation { CALayer *layer = [[CALayer alloc] init]; layer.cornerRadius = [UIScreen mainScreen].bounds.size.width/2; layer.frame = CGRectMake(0, 0, layer.cornerRadius * 2, layer.cornerRadius * 2); layer.position = CGPointMake([UIScreen mainScreen].bounds.size.width/2,[UIScreen mainScreen].bounds.size.height - 84); // self.view.layer.position; UIColor *color = [UIColor colorWithRed:arc4random()%10*0.1 green:arc4random()%10*0.1 blue:arc4random()%10*0.1 alpha:1]; layer.backgroundColor = color.CGColor; [self.view.layer addSublayer:layer]; CAMediaTimingFunction *defaultCurve = [CAMediaTimingFunction functionWithName:kCAMediaTimingFunctionDefault]; _animaTionGroup = [CAAnimationGroup animation]; _animaTionGroup.delegate = self; _animaTionGroup.duration = 2; _animaTionGroup.removedOnCompletion = YES; _animaTionGroup.timingFunction = defaultCurve; CABasicAnimation *scaleAnimation = [CABasicAnimation animationWithKeyPath:@"transform.scale.xy"]; scaleAnimation.fromValue = @0.0; scaleAnimation.toValue = @1.0; scaleAnimation.duration = 2; CAKeyframeAnimation *opencityAnimation = [CAKeyframeAnimation animationWithKeyPath:@"opacity"]; opencityAnimation.duration = 2; opencityAnimation.values = @[@0.8,@0.4,@0]; opencityAnimation.keyTimes = @[@0,@0.5,@1]; opencityAnimation.removedOnCompletion = YES; NSArray *animations = @[scaleAnimation,opencityAnimation]; _animaTionGroup.animations = animations; [layer addAnimation:_animaTionGroup forKey:nil]; [self performSelector:@selector(removeLayer:) withObject:layer afterDelay:1.5]; } - (void)removeLayer:(CALayer *)layer { [layer removeFromSuperlayer]; } - (void)delayAnimation { [self startAnimation]; }
完毕以上操作,就大功告成了!
温馨提示:
1.因为是语音识别,须要用到麦克风相关权限。模拟器会爆12个错误。使用真机能够解决;
2.涉及到授权文件相关并不复杂,projectBundle Identifier仅仅须要设置百度的离线授权一致就可以。例如以下图:
终于效果例如以下:
有不懂或不明确的地方能够微博联系我:
iOS开发人员交流群:446310206