iOS语音书写功能(语音转文本)
最近在项目开发中,需要将语音识别转换成文本的功能。研究了下科大讯飞,附上Demo分享给大家。
研发前先得做一些准备。
1、注册科大讯飞开发者帐号(http://www.xfyun.cn)
2、下载开发平台(iOS、或Android,或其他)所需要的SDK(SDK包含:说明文档、SDK即iflyMSC.framework、Demo)
3、项目中添加SDK(添加时,先将SDK复制粘贴到项目文件,再通过addframe的方法添加到项目引用),及相关联的framework
添加方法:TARGETS-Build Phases-Link Binary With Libraries-"+"-Choose frameworks and libraries to add-add other,或选择对应的framework-add
4、使用时要添加对应的头文件
特别说明:
1、使用SDK关联的APPID存在于下载的Demo中,如果SDK有替换的话APPID应该跟着一起替换。
2、添加其他framework:
libz.tbd
libc++.tbd
CoreGraphics.framework
QuartzCore.framework
AddressBook.framework
CoreLocation.framework
UIKit.framework
AudioToolbox.framework
Foundation.framework
SystemConfiguration.framework
AVFoundation.framework
CoreTelephoney.framework
3、Bitcode属性设置为NO(TARGETS-Build Settings-Build Options-Enable Bitcode-NO)
4、在使用前,务必在AppDelegate的方法中"
- (BOOL)application:(UIApplication*)application didFinishLaunchingWithOptions:(NSDictionary*)launchOptions {}"进行初始化操作。
5、需要有网络的情况下才能使用。
如图
下载的科大讯飞SDK文件
Demo中的APPID
添加SDK,及添加关联framework
设置Bitcode属性为 NO
语音转文件实现代码
- .h文件
- #import
- //导入头文件
- #import"iflyMSC.framework/Headers/IFlyMSC.h"
- #import"iflyMSC.framework/Headers/IFlySpeechUtility.h"
- #import"iflyMSC/IFlySpeechConstant.h"
- #pragmamark-初始化参数类
- /**************************************************************************/
- @interfaceIATConfig:NSObject
- +(IATConfig*)sharedInstance;
- +(NSString*)mandarin;
- +(NSString*)cantonese;
- +(NSString*)henanese;
- +(NSString*)chinese;
- +(NSString*)english;
- +(NSString*)lowSampleRate;
- +(NSString*)highSampleRate;
- +(NSString*)isDot;
- +(NSString*)noDot;
- /**
- 以下参数,需要通过iFlySpeechRecgonizer进行设置
- */
- @property(nonatomic,strong)NSString*speechTimeout;
- @property(nonatomic,strong)NSString*vadEos;
- @property(nonatomic,strong)NSString*vadBos;
- @property(nonatomic,strong)NSString*language;
- @property(nonatomic,strong)NSString*accent;
- @property(nonatomic,strong)NSString*dot;
- @property(nonatomic,strong)NSString*sampleRate;
- /**
- 以下参数无需设置不必关
- */
- @property(nonatomic,assign)BOOLhaveView;
- @property(nonatomic,strong)NSArray*accentIdentifer;
- @property(nonatomic,strong)NSArray*accentNickName;
- @end
- /**************************************************************************/
- #pragmamark-语音听写类
- @interfaceVoiceConversion:NSObject
- ///启动初始化语音程序
- +(void)VoiceInitialize;
- ///开始录音
- -(void)voiceStart:(void(^)(BOOLisStart))startListening
- speechBegin:(void(^)(void))begin
- speechEnd:(void(^)(void))end
- speechError:(void(^)(BOOLisSuccess))error
- speechResult:(void(^)(NSString*text))result
- speechVolume:(void(^)(intvolume))volume;
- ///取消录音
- -(void)voiceCancel;
- ///停止录音
- -(void)voiceStop;
- @end
- .m文件
- #import"VoiceConversion.h"
- #pragmamark-初始化参数类
- /**************************************************************************/
- staticNSString*constPUTONGHUA=@"mandarin";
- staticNSString*constYUEYU=@"cantonese";
- staticNSString*constHENANHUA=@"henanese";
- staticNSString*constENGLISH=@"en_us";
- staticNSString*constCHINESE=@"zh_cn";
- @implementationIATConfig
- -(id)init
- {
- self=[superinit];
- if(self)
- {
- [selfdefaultSetting];
- returnself;
- }
- returnnil;
- }
- +(IATConfig*)sharedInstance
- {
- staticIATConfig*instance=nil;
- staticdispatch_once_tpredict;
- dispatch_once(&predict,^{
- instance=[[IATConfigalloc]init];
- });
- returninstance;
- }
- -(void)defaultSetting
- {
- _speechTimeout=@"30000";
- _vadEos=@"3000";
- _vadBos=@"3000";
- _dot=@"1";
- _sampleRate=@"16000";
- _language=CHINESE;
- _accent=PUTONGHUA;
- _haveView=NO;//默认是不dai界面的
- _accentNickName=[[NSArrayalloc]initWithObjects:@"粤语",@"普通话",@"河南话",@"英文",nil];
- }
- +(NSString*)mandarin
- {
- returnPUTONGHUA;
- }
- +(NSString*)cantonese
- {
- returnYUEYU;
- }
- +(NSString*)henanese
- {
- returnHENANHUA;
- }
- +(NSString*)chinese
- {
- returnCHINESE;
- }
- +(NSString*)english
- {
- returnENGLISH;
- }
- +(NSString*)lowSampleRate
- {
- return@"8000";
- }
- +(NSString*)highSampleRate
- {
- return@"16000";
- }
- +(NSString*)isDot
- {
- return@"1";
- }
- +(NSString*)noDot
- {
- return@"0";
- }
- @end
- /**************************************************************************/
- #pragmamark-语音听写类
- staticNSString*constVoiceAPPID=@"572016e4";
- staticNSString*constVoiceTimeOut=@"20000";
- @interfaceVoiceConversion()
- @property(nonatomic,strong)NSMutableString*resultText;
- @property(nonatomic,strong)IFlySpeechRecognizer*iFlySpeechRecognizer;
- @property(nonatomic,copy)void(^beginSpeech)(void);
- @property(nonatomic,copy)void(^endSpeech)(void);
- @property(nonatomic,copy)void(^errorSpeech)(BOOLisSuccess);
- @property(nonatomic,copy)void(^resultSpeech)(NSString*text);
- @property(nonatomic,copy)void(^volumeSpeech)(intvolume);
- @end
- @implementationVoiceConversion
- #pragmamark初始化------------
- ///启动初始化语音程序
- +(void)VoiceInitialize
- {
- //设置sdk的log等级,log保存在下面设置的工作路径中
- [IFlySettingsetLogFile:LVL_ALL];
- //打开输出在console的log开关
- [IFlySettingshowLogcat:YES];
- //设置sdk的工作路径
- NSArray*paths=NSSearchPathForDirectoriesInDomains(NSCachesDirectory,NSUserDomainMask,YES);
- NSString*cachePath=[pathsobjectAtIndex:0];
- [IFlySettingsetLogFilePath:cachePath];
- //Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在App启动时执行初始化,具体代码可以参照Demo的MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码10111.
- NSString*initString=[[NSStringalloc]initWithFormat:@"appid=%@",VoiceAPPID];
- [IFlySpeechUtilitycreateUtility:initString];
- }
- #pragmamark实例化------------
- -(void)dealloc
- {
- [selfvoiceCancel];
- }
- -(NSMutableString*)resultText
- {
- if(!_resultText)
- {
- _resultText=[[NSMutableStringalloc]init];
- }
- return_resultText;
- }
- -(IFlySpeechRecognizer*)iFlySpeechRecognizer
- {
- if(_iFlySpeechRecognizer==nil)
- {
- _iFlySpeechRecognizer=[IFlySpeechRecognizersharedInstance];
- [_iFlySpeechRecognizersetParameter:@""forKey:[IFlySpeechConstantPARAMS]];
- //设置听写模式
- [_iFlySpeechRecognizersetParameter:@"iat"forKey:[IFlySpeechConstantIFLY_DOMAIN]];
- }
- return_iFlySpeechRecognizer;
- }
- -(void)initializeVoice
- {
- self.iFlySpeechRecognizer.delegate=self;
- IATConfig*instance=[IATConfigsharedInstance];
- //设置最长录音时间
- [self.iFlySpeechRecognizersetParameter:instance.speechTimeoutforKey:[IFlySpeechConstantSPEECH_TIMEOUT]];
- //设置后端点
- [self.iFlySpeechRecognizersetParameter:instance.vadEosforKey:[IFlySpeechConstantVAD_EOS]];
- //设置前端点
- [self.iFlySpeechRecognizersetParameter:instance.vadBosforKey:[IFlySpeechConstantVAD_BOS]];
- //网络等待时间
- [self.iFlySpeechRecognizersetParameter:@"20000"forKey:[IFlySpeechConstantNET_TIMEOUT]];
- //设置采样率,推荐使用16K
- [self.iFlySpeechRecognizersetParameter:instance.sampleRateforKey:[IFlySpeechConstantSAMPLE_RATE]];
- if([instance.languageisEqualToString:[IATConfigchinese]])
- {
- //设置语言
- [self.iFlySpeechRecognizersetParameter:instance.languageforKey:[IFlySpeechConstantLANGUAGE]];
- //设置方言
- [self.iFlySpeechRecognizersetParameter:instance.accentforKey:[IFlySpeechConstantACCENT]];
- }
- elseif([instance.languageisEqualToString:[IATConfigenglish]])
- {
- [self.iFlySpeechRecognizersetParameter:instance.languageforKey:[IFlySpeechConstantLANGUAGE]];
- }
- //设置是否返回标点符号
- [self.iFlySpeechRecognizersetParameter:instance.dotforKey:[IFlySpeechConstantASR_PTT]];
- }
- #pragmamark语音听写方法------------
- ///开始录音
- -(void)voiceStart:(void(^)(BOOLisStart))startListening
- speechBegin:(void(^)(void))begin
- speechEnd:(void(^)(void))end
- speechError:(void(^)(BOOLisSuccess))error
- speechResult:(void(^)(NSString*text))result
- speechVolume:(void(^)(intvolume))volume
- {
- [self.resultTextsetString:@""];
- //回调设置
- self.beginSpeech=[begincopy];
- self.endSpeech=[endcopy];
- self.errorSpeech=[errorcopy];
- self.resultSpeech=[resultcopy];
- self.volumeSpeech=[volumecopy];
- //初始化设置
- [selfinitializeVoice];
- [self.iFlySpeechRecognizercancel];
- //设置音频来源为麦克风
- [self.iFlySpeechRecognizersetParameter:IFLY_AUDIO_SOURCE_MICforKey:@"audio_source"];
- //设置听写结果格式为json
- [self.iFlySpeechRecognizersetParameter:@"json"forKey:[IFlySpeechConstantRESULT_TYPE]];
- //保存录音文件,保存在sdk工作路径中,如未设置工作路径,则默认保存在library/cache下
- [self.iFlySpeechRecognizersetParameter:@"asr.pcm"forKey:[IFlySpeechConstantASR_AUDIO_PATH]];
- BOOLisStart=[self.iFlySpeechRecognizerstartListening];
- if(startListening)
- {
- //如果开始录音失败,可能是上次请求未结束,暂不支持多路并发
- startListening(isStart);
- }
- }
- ///取消听写
- -(void)voiceCancel
- {
- [self.iFlySpeechRecognizercancel];
- }
- ///停止录音
- -(void)voiceStop
- {
- [self.iFlySpeechRecognizerstopListening];
- }
- #pragmamarkIFlySpeechRecognizerDelegate------------
- /**
- 识别结果返回代理
- @param:results识别结果
- @param:isLast表示是否最后一次结果
- */
- -(void)onResults:(NSArray*)resultsisLast:(BOOL)isLast
- {
- NSMutableString*resultString=[[NSMutableStringalloc]init];
- NSDictionary*dic=results[0];
- for(NSString*keyindic)
- {
- [resultStringappendFormat:@"%@",key];
- }
- NSString*resultFromJson=[[selfclass]stringFromJson:resultString];
- NSString*resultTextTemp=[NSStringstringWithFormat:@"%@%@",self.resultText,resultFromJson];
- [self.resultTextsetString:resultTextTemp];
- if(isLast)
- {
- if(self.resultSpeech)
- {
- //去掉最后一个句号
- NSRangerange=[self.resultTextrangeOfString:@"。"options:NSBackwardsSearch];
- if(range.location!=NSNotFound)
- {
- resultTextTemp=[self.resultTextsubstringToIndex:range.location];
- [self.resultTextsetString:resultTextTemp];
- }
- self.resultSpeech(self.resultText);
- }
- }
- [selfvoiceCancel];
- }
- /**
- 识别会话结束返回代理
- @paramerror错误码,error.errorCode=0表示正常结束,非0表示发生错误。
- */
- -(void)onError:(IFlySpeechError*)error
- {
- if(self.errorSpeech)
- {
- BOOLisSuccess=(0==error.errorCode);
- self.errorSpeech(isSuccess);
- }
- }
- /**
- 停止录音回调
- */
- -(void)onEndOfSpeech
- {
- if(self.endSpeech)
- {
- self.endSpeech();
- }
- }
- /**
- 开始识别回调
- */
- -(void)onBeginOfSpeech
- {
- if(self.beginSpeech)
- {
- self.beginSpeech();
- }
- }
- /**
- 音量回调函数volume0-30
- */
- -(void)onVolumeChanged:(int)volume
- {
- if(self.volumeSpeech)
- {
- self.volumeSpeech(volume);
- }
- }
- #pragmamark解析方法------------
- /**************************************************************************/
- /**
- 解析命令词返回的结果
- */
- +(NSString*)stringFromAsr:(NSString*)params;
- {
- NSMutableString*resultString=[[NSMutableStringalloc]init];
- NSString*inputString=nil;
- NSArray*array=[paramscomponentsSeparatedByString:@"\n"];
- for(intindex=0;index
- {
- NSRangerange;
- NSString*line=[arrayobjectAtIndex:index];
- NSRangeidRange=[linerangeOfString:@"id="];
- NSRangenameRange=[linerangeOfString:@"name="];
- NSRangeconfidenceRange=[linerangeOfString:@"confidence="];
- NSRangegrammarRange=[linerangeOfString:@"grammar="];
- NSRangeinputRange=[linerangeOfString:@"input="];
- if(confidenceRange.length==0||grammarRange.length==0||inputRange.length==0)
- {
- continue;
- }
- //checknomatch
- if(idRange.length!=0)
- {
- NSUIntegeridPosX=idRange.location+idRange.length;
- NSUIntegeridLength=nameRange.location-idPosX;
- range=NSMakeRange(idPosX,idLength);
- NSString*subString=[linesubstringWithRange:range];
- NSCharacterSet*subSet=[NSCharacterSetwhitespaceAndNewlineCharacterSet];
- NSString*idValue=[subStringstringByTrimmingCharactersInSet:subSet];
- if([idValueisEqualToString:@"nomatch"])
- {
- return@"";
- }
- }
- //GetConfidenceValue
- NSUIntegerconfidencePosX=confidenceRange.location+confidenceRange.length;
- NSUIntegerconfidenceLength=grammarRange.location-confidencePosX;
- range=NSMakeRange(confidencePosX,confidenceLength);
- NSString*score=[linesubstringWithRange:range];
- NSUIntegerinputStringPosX=inputRange.location+inputRange.length;
- NSUIntegerinputStringLength=line.length-inputStringPosX;
- range=NSMakeRange(inputStringPosX,inputStringLength);
- inputString=[linesubstringWithRange:range];
- [resultStringappendFormat:@"%@置信度%@\n",inputString,score];
- }
- returnresultString;
- }
- /**
- 解析听写json格式的数据
- params例如:
- {"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"尽","sc":0}]},{"bg":0,"cw":[{"w":"黄河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]}
- */
- +(NSString*)stringFromJson:(NSString*)params
- {
- if(params==NULL)
- {
- returnnil;
- }
- NSMutableString*tempStr=[[NSMutableStringalloc]init];
- //返回的格式必须为utf8的,否则发生未知错误
- NSData*dataJSON=[paramsdataUsingEncoding:NSUTF8StringEncoding];
- NSDictionary*resultDic=[NSJSONSerializationJSONObjectWithData:dataJSONoptions:kNilOptionserror:nil];
- if(resultDic!=nil)
- {
- NSArray*wordArray=[resultDicobjectForKey:@"ws"];
- for(inti=0;i<[wordArraycount];i++)
- {
- NSDictionary*wsDic=[wordArrayobjectAtIndex:i];
- NSArray*cwArray=[wsDicobjectForKey:@"cw"];
- for(intj=0;j<[cwArraycount];j++)
- {
- NSDictionary*wDic=[cwArrayobjectAtIndex:j];
- NSString*str=[wDicobjectForKey:@"w"];
- [tempStrappendString:str];
- }
- }
- }
- returntempStr;
- }
- /**
- 解析语法识别返回的结果
- */
- +(NSString*)stringFromABNFJson:(NSString*)params
- {
- if(params==NULL)
- {
- returnnil;
- }
- NSMutableString*tempStr=[[NSMutableStringalloc]init];
- NSData*dataJSON=[paramsdataUsingEncoding:NSUTF8StringEncoding];
- NSDictionary*resultDic=[NSJSONSerializationJSONObjectWithData:dataJSONoptions:kNilOptionserror:nil];
- NSArray*wordArray=[resultDicobjectForKey:@"ws"];
- for(inti=0;i<[wordArraycount];i++)
- {
- NSDictionary*wsDic=[wordArrayobjectAtIndex:i];
- NSArray*cwArray=[wsDicobjectForKey:@"cw"];
- for(intj=0;j<[cwArraycount];j++)
- {
- NSDictionary*wDic=[cwArrayobjectAtIndex:j];
- NSString*str=[wDicobjectForKey:@"w"];
- NSString*score=[wDicobjectForKey:@"sc"];
- [tempStrappendString:str];
- [tempStrappendFormat:@"置信度:%@",score];
- [tempStrappendString:@"\n"];
- }
- }
- returntempStr;
- }
- /**************************************************************************/
- @end
使用
- 初始化方法
- ///启动初始化语音程序
- +(void)VoiceInitialize
- {
- //设置sdk的log等级,log保存在下面设置的工作路径中
- [IFlySettingsetLogFile:LVL_ALL];
- //打开输出在console的log开关
- [IFlySettingshowLogcat:YES];
- //设置sdk的工作路径
- NSArray*paths=NSSearchPathForDirectoriesInDomains(NSCachesDirectory,NSUserDomainMask,YES);
- NSString*cachePath=[pathsobjectAtIndex:0];
- [IFlySettingsetLogFilePath:cachePath];
- //Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在App启动时执行初始化,具体代码可以参照Demo的MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码10111.
- NSString*initString=[[NSStringalloc]initWithFormat:@"appid=%@",VoiceAPPID];
- [IFlySpeechUtilitycreateUtility:initString];
- }
- 初始化调用
- -(BOOL)application:(UIApplication*)applicationdidFinishLaunchingWithOptions:(NSDictionary*)launchOptions
- {
- //Overridepointforcustomizationafterapplicationlaunch.
- [VoiceConversionVoiceInitialize];
- returnYES;
- }
- #import"VoiceConversion.h"
- @interfaceViewController()
- @property(nonatomic,strong)VoiceConversion*voiceConversion;
- @property(nonatomic,strong)UILabel*messageLabel;
- @end
- @implementationViewController
- -(void)viewDidLoad{
- [superviewDidLoad];
- //Doanyadditionalsetupafterloadingtheview,typicallyfromanib.
- UIBarButtonItem*startItem=[[UIBarButtonItemalloc]initWithTitle:@"start"style:UIBarButtonItemStyleDonetarget:selfaction:@selector(startItemClick:)];
- UIBarButtonItem*stopItem=[[UIBarButtonItemalloc]initWithTitle:@"stop"style:UIBarButtonItemStyleDonetarget:selfaction:@selector(stopItemClick:)];
- UIBarButtonItem*cancelItem=[[UIBarButtonItemalloc]initWithTitle:@"cancel"style:UIBarButtonItemStyleDonetarget:selfaction:@selector(cancelItemClick:)];
- self.navigationItem.rightBarButtonItems=@[startItem,stopItem,cancelItem];
- self.title=@"科大讯飞语音";
- [selfsetUI];
- }
- -(void)didReceiveMemoryWarning{
- [superdidReceiveMemoryWarning];
- //Disposeofanyresourcesthatcanberecreated.
- }
- #pragmamark-视图
- -(void)setUI
- {
- if([selfrespondsToSelector:@selector(setEdgesForExtendedLayout:)])
- {
- [selfsetEdgesForExtendedLayout:UIRectEdgeNone];
- }
- self.messageLabel=[[UILabelalloc]initWithFrame:CGRectMake(10.0,10.0,CGRectGetWidth(self.view.bounds)-10.0*2,40.0)];
- [self.viewaddSubview:self.messageLabel];
- self.messageLabel.backgroundColor=[UIColorcolorWithWhite:0.5alpha:0.3];
- self.messageLabel.textAlignment=NSTextAlignmentCenter;
- }
- #pragmamark-响应
- -(void)startItemClick:(UIBarButtonItem*)item
- {
- ViewController__weak*weakSelf=self;
- [self.voiceConversionvoiceStart:^(BOOLisStart){
- NSLog(@"1start");
- if(isStart)
- {
- weakSelf.messageLabel.text=@"正在录音";
- }
- else
- {
- weakSelf.messageLabel.text=@"启动识别服务失败,请稍后重试";
- }
- }speechBegin:^{
- NSLog(@"2begin");
- }speechEnd:^{
- NSLog(@"3end");
- }speechError:^(BOOLisSuccess){
- NSLog(@"4error");
- }speechResult:^(NSString*text){
- NSLog(@"5result");
- weakSelf.messageLabel.text=text;
- }speechVolume:^(intvolume){
- NSLog(@"6volume");
- NSString*volumeString=[NSStringstringWithFormat:@"音量:%d",volume];
- weakSelf.messageLabel.text=volumeString;
- }];
- }
- -(void)stopItemClick:(UIBarButtonItem*)item
- {
- [self.voiceConversionvoiceStop];
- self.messageLabel.text=@"停止录音";
- }
- -(void)cancelItemClick:(UIBarButtonItem*)item
- {
- [self.voiceConversionvoiceCancel];
- self.messageLabel.text=@"取消识别";
- }
- #pragmamark-getter
- -(VoiceConversion*)voiceConversion
- {
- if(!_voiceConversion)
- {
- _voiceConversion=[[VoiceConversionalloc]init];
- }
- return_voiceConversion;
- }
- @end