与众不同 windows phone (45) - 8.0 语音: TTS, 语音识别, 语音命令

作者：webabcd

介绍
与众不同 windows phone 8.0 之语音

TTS（Text To Speech）
语音识别
语音命令

示例
1、演示 TTS（Text To Speech）的应用
Speech/TTS.xaml

<phone:PhoneApplicationPage
    x:Class="Demo.Speech.TTS"
    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
    xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
    xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
    FontFamily="{StaticResource PhoneFontFamilyNormal}"
    FontSize="{StaticResource PhoneFontSizeNormal}"
    Foreground="{StaticResource PhoneForegroundBrush}"
    SupportedOrientations="Portrait" Orientation="Portrait"
    mc:Ignorable="d"
    shell:SystemTray.IsVisible="True">

    <Grid Background="Transparent">
        <StackPanel Orientation="Vertical">

            <TextBlock Name="lblMsg" />
            
            <Button x:Name="btnTTS_Basic" Content="TTS 基础" Click="btnTTS_Basic_Click" />

            <Button x:Name="btnTTS_Select" Content="用指定的语音 TTS" Click="btnTTS_Select_Click" />

            <Button x:Name="btnTTS_SSML" Content="朗读 SSML 文档" Click="btnTTS_SSML_Click" />

        </StackPanel>
    </Grid>

</phone:PhoneApplicationPage>

Speech/TTS.xaml.cs

/*
 * 演示 TTS（Text To Speech）的应用
 * 
 * 
 * InstalledVoices - 管理已安装的语音
 *     All - 已安装的全部语音，返回 VoiceInformation 对象列表
 *     Default - 默认语音，返回 VoiceInformation 对象
 *     
 * VoiceInformation - 语音信息
 *     Id - 标识
 *     Language - 语言
 *     DisplayName - 名称
 *     Description - 描述
 *     Gender - 性别（VoiceGender.Male 或 VoiceGender.Female）
 * 
 * SpeechSynthesizer - TTS 的类
 *     SetVoice(VoiceInformation voiceInformation) - 设置语音
 *     GetVoice() - 获取语音信息
 *     SpeakTextAsync(string content, object userState) - 朗读指定的文本。可以设置一个上下文对象，在 SpeechStarted 时取出
 *     SpeakSsmlAsync(string content, object userState) - 朗读指定的 SSML 文档。可以设置一个上下文对象，在 SpeechStarted 时取出
 *     SpeakSsmlFromUriAsync(Uri content, object userState) - 朗读指定地址的 SSML 文档。可以设置一个上下文对象，在 SpeechStarted 时取出
 *     CancelAll() - 取消全部朗读
 *     SpeechStarted - 开始朗读时触发的事件
 *     BookmarkReached - 朗读到 <mark /> 标记时触发的事件（仅针对 SSML 协议）
 * 
 * 
 * 注：
 * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" />
 * 2、SSML - Speech Synthesis Markup Language
 * 3、微软关于 ssml 的说明：http://msdn.microsoft.com/en-us/library/hh361578
 * 4、W3C 关于 ssml 的说明：http://www.w3.org/TR/speech-synthesis/
 */

using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.Synthesis;

namespace Demo.Speech
{
    public partial class TTS : PhoneApplicationPage
    {
        private string _text = "TTS 是 Text To Speech 的缩写<mark name=\"xxx\" />，即“从文本到语音”，是人机对话的一部分，让机器能够说话。";

        public TTS()
        {
            InitializeComponent();
        }

        // 默认方式朗读文本
        private async void btnTTS_Basic_Click(object sender, RoutedEventArgs e)
        {
            SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();
            await speechSynthesizer.SpeakTextAsync(_text);
        }

        // 用指定的语音朗读文本
        private async void btnTTS_Select_Click(object sender, RoutedEventArgs e)
        {
            SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();

            // 中文语音列表（应该有两条记录：第一条是女声；第二条是男声。具体信息可从 VoiceInformation 对象中获取）
            IEnumerable<VoiceInformation> zhVoices = from voice in InstalledVoices.All
                                                     where voice.Language == "zh-CN"
                                                     select voice;

            // 设置语音
            speechSynthesizer.SetVoice(zhVoices.ElementAt(0));

            // 朗读文本
            await speechSynthesizer.SpeakTextAsync(_text);
        }

        // 朗读指定 SSML 协议文档
        private async void btnTTS_SSML_Click(object sender, RoutedEventArgs e)
        {
            SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();

            // 开始朗读时触发的事件
            speechSynthesizer.SpeechStarted += speechSynthesizer_SpeechStarted;

            // 到达 <mark /> 标记时触发的事件
            speechSynthesizer.BookmarkReached += speechSynthesizer_BookmarkReached;

            // 微软关于 ssml 的说明：http://msdn.microsoft.com/en-us/library/hh361578
            // W3C 关于 ssml 的说明：http://www.w3.org/TR/speech-synthesis/

            string ssml = "<speak version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\" xml:lang=\"zh-CN\">"; // 中文
            ssml += "<voice gender=\"male\">"; // 男声
            ssml += "<prosody rate=\"-50%\">"; // 语速放慢 50%
            ssml += _text;
            ssml += "</prosody>";
            ssml += "</voice>";
            ssml += "</speak>";
          
            // 朗读 SSML
            await speechSynthesizer.SpeakSsmlAsync(ssml);
        }

        void speechSynthesizer_SpeechStarted(SpeechSynthesizer sender, SpeechStartedEventArgs args)
        {
            // 获取上下文对象
            object userState = args.UserState;
        }

        void speechSynthesizer_BookmarkReached(SpeechSynthesizer sender, SpeechBookmarkReachedEventArgs args)
        {
            this.Dispatcher.BeginInvoke(delegate() 
            {
                // 触发当前事件的 <mark /> 标记的名称
                lblMsg.Text = "mark name: " + args.Bookmark;
                lblMsg.Text += Environment.NewLine;

                // 朗读到触发当前事件的 <mark /> 标记所用的时间
                lblMsg.Text += "audio position: " + args.AudioPosition.TotalSeconds;
            });
        }
    }
}

2、演示如何通过自定义语法列表做语音识别，以及如何通过 SRGS 自定义语法做语音识别
Speech/SRGSGrammar.xml

<?xml version="1.0" encoding="utf-8"?>
<grammar version="1.0" xml:lang="zh-cn" root="Main" tag-format="semantics/1.0"
         xmlns="http://www.w3.org/2001/06/grammar"
         xmlns:sapi="http://schemas.microsoft.com/Speech/2002/06/SRGSExtensions">
  <rule id="Main">
    <item repeat="0-1">我想去</item>
    <ruleref uri="#Cities" />
  </rule>
  <rule id="Cities" scope="public">
    <one-of>
      <item>北京</item>
      <item>深圳</item>
      <item>上海</item>
      <item>广州</item>
    </one-of>
  </rule>
</grammar>

<!--
本例可以识别：我想去北京；我想去深圳；我想去上海；我想去广州；北京；深圳；上海；广州

Visual Studio 有创建 SRGSGrammar（SRGS 语法）文件的模板
微软关于 SRGS 的说明：http://msdn.microsoft.com/en-us/library/hh361653
W3C 关于 SRGS 的说明：http://www.w3.org/TR/speech-grammar/
-->

Speech/SpeechRecognition.xaml

<phone:PhoneApplicationPage
    x:Class="Demo.Speech.SpeechRecognition"
    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
    xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
    xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
    FontFamily="{StaticResource PhoneFontFamilyNormal}"
    FontSize="{StaticResource PhoneFontSizeNormal}"
    Foreground="{StaticResource PhoneForegroundBrush}"
    SupportedOrientations="Portrait" Orientation="Portrait"
    mc:Ignorable="d"
    shell:SystemTray.IsVisible="True">

    <Grid Background="Transparent">
        <StackPanel Orientation="Vertical">

            <TextBlock Name="lblMsg" />

            <Button x:Name="btnDemo" Content="通过自定义语法列表做语音识别" Click="btnDemo_Click" />

            <Button x:Name="btnSRGS" Content="通过 SRGS 自定义语法做语音识别" Click="btnSRGS_Click" />
            
        </StackPanel>
    </Grid>

</phone:PhoneApplicationPage>

Speech/SpeechRecognition.xaml.cs

/*
 * 演示如何通过自定义语法列表做语音识别，以及如何通过 SRGS 自定义语法做语音识别
 * 
 * 
 * 语音识别：用于在 app 内识别语音
 * 语音命令：用于在 app 外通过语音命令启动 app
 *  
 * 
 * 注：
 * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> <Capability Name="ID_CAP_MICROPHONE" />
 * 2、安装语音识别器：设置 -> 语音 -> 在“语音语言”列表中安装指定的语音识别器，并启用语音识别服务
 * 3、SRGS - Speech Recognition Grammar Specification
 * 4、微软关于 SRGS 的说明：http://msdn.microsoft.com/en-us/library/hh361653
 * 5、W3C 关于 SRGS 的说明：http://www.w3.org/TR/speech-grammar/
 */

using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.Recognition;

namespace Demo.Speech
{
    public partial class SpeechRecognition : PhoneApplicationPage
    {
        public SpeechRecognition()
        {
            InitializeComponent();
        }

        private async void btnDemo_Click(object sender, RoutedEventArgs e)
        {
            // 语音识别器，带 UI 的
            SpeechRecognizerUI speechRecognizerUI = new SpeechRecognizerUI();

            // 识别过程中发生问题时触发的事件
            speechRecognizerUI.Recognizer.AudioProblemOccurred += Recognizer_AudioProblemOccurred;
            // 音频捕获状态发生变化时触发的事件
            speechRecognizerUI.Recognizer.AudioCaptureStateChanged += Recognizer_AudioCaptureStateChanged;

            // InitialSilenceTimeout - 在此时间内收到的都是无声输入，则终止识别
            speechRecognizerUI.Recognizer.Settings.InitialSilenceTimeout = TimeSpan.FromSeconds(5.0);
            // EndSilenceTimeout - 语音识别开始后，如果此时间内都是无声输入，则识别结束
            speechRecognizerUI.Recognizer.Settings.EndSilenceTimeout = TimeSpan.FromSeconds(0.15);
            // BabbleTimeout - 在此时间内收到的都是噪音，则终止识别（0 代表禁用此功能）
            speechRecognizerUI.Recognizer.Settings.BabbleTimeout = TimeSpan.FromSeconds(0.0);

            // 获取中文语音识别器
            IEnumerable<SpeechRecognizerInformation> zhRecognizers = from recognizerInfo in InstalledSpeechRecognizers.All
                                                                     where recognizerInfo.Language == "zh-CN"
                                                                     select recognizerInfo;

            if (zhRecognizers.Count() > 0)
            {
                // 指定语音识别器
                speechRecognizerUI.Recognizer.SetRecognizer(zhRecognizers.First());

                // 设置语音识别的单词列表
                string[] phrases = { "xbox", "海贼王", "王磊" };
                speechRecognizerUI.Recognizer.Grammars.AddGrammarFromList("myWord", phrases);
                // speechRecognizerUI.Recognizer.Grammars.AddGrammarFromPredefinedType("dictation", SpeechPredefinedGrammar.Dictation); // 听写整句，基于本地的语音识别
                // speechRecognizerUI.Recognizer.Grammars.AddGrammarFromPredefinedType("webSearch", SpeechPredefinedGrammar.WebSearch); // 听写整句，基于网络的语音识别

                // 预加载全部语法
                await speechRecognizerUI.Recognizer.PreloadGrammarsAsync();

                // 带 UI 的语音识别器的监听页上显示的标题
                speechRecognizerUI.Settings.ListenText = "监听中。。。";

                // 带 UI 的语音识别器的监听页上显示的示例文本
                speechRecognizerUI.Settings.ExampleText = "精确识别：xbox, 海贼王, 王磊";

                // 在“您说的是”页（如果匹配到多条记录，则会在此页列出）和“听到您说”页是否需要通过 TTS 朗读识别的内容（当在语音设置中启用了“播放音频确认”时，此处 true 才会有效）
                speechRecognizerUI.Settings.ReadoutEnabled = true;

                // 是否显示“听到您说”页（用于显示识别出的最终文本）
                speechRecognizerUI.Settings.ShowConfirmation = false;

                try
                {
                    // 开始识别
                    SpeechRecognitionUIResult result = await speechRecognizerUI.RecognizeWithUIAsync();

                    // 输出识别状态和结果
                    lblMsg.Text = "识别状态: " + result.ResultStatus.ToString();
                    lblMsg.Text += Environment.NewLine;
                    lblMsg.Text += "识别结果：" + result.RecognitionResult.Text;
                    lblMsg.Text += Environment.NewLine;
                    lblMsg.Text += "可信度级别: " + result.RecognitionResult.TextConfidence.ToString(); // Rejected, Low, Medium, High
                }
                catch (Exception ex)
                {
                    if ((uint)ex.HResult == 0x800455BC)
                    {
                        lblMsg.Text = "当前语音识别器不支持所请求的语言: " + speechRecognizerUI.Recognizer.GetRecognizer().Language;
                    }
                    else
                    {
                        lblMsg.Text = ex.ToString();
                    }
                }
            }
            else
            {
                lblMsg.Text = "未安装中文语音识别器";
            }
        }

        void Recognizer_AudioCaptureStateChanged(SpeechRecognizer sender, SpeechRecognizerAudioCaptureStateChangedEventArgs args)
        {
            // 音频捕获状态发生了变化：Capturing（捕获中） 或 Inactive（未捕获）
            lblMsg.Text = "AudioCaptureStateChanged: " + args.State.ToString();
        }

        void Recognizer_AudioProblemOccurred(SpeechRecognizer sender, SpeechAudioProblemOccurredEventArgs args)
        {
            // 识别过程中发生了问题：TooLoud, TooQuiet, TooFast, TooSlow, TooNoisy, NoSignal, None
            lblMsg.Text = "AudioProblemOccurred: " + args.Problem.ToString();
        }



        // 通过 SRGS 自定义语法
        // 微软关于 SRGS 的说明：http://msdn.microsoft.com/en-us/library/hh361653
        // W3C 关于 SRGS 的说明：http://www.w3.org/TR/speech-grammar/
        private async void btnSRGS_Click(object sender, RoutedEventArgs e)
        {
            // 语音识别器，无 UI 的
            SpeechRecognizer speechRecognizer = new SpeechRecognizer();

            // 指定 SRGS 语法
            Uri mySRGS = new Uri("ms-appx:///Speech/SRGSGrammar.xml", UriKind.Absolute);
            speechRecognizer.Grammars.AddGrammarFromUri("srgs", mySRGS);

            try
            {
                lblMsg.Text = "监听中。。。";
                lblMsg.Text += Environment.NewLine;

                // 开始识别
                SpeechRecognitionResult result = await speechRecognizer.RecognizeAsync();

                // 输出识别结果
                lblMsg.Text += "识别结果：" + result.Text;
                lblMsg.Text += Environment.NewLine;
                lblMsg.Text += "可信度级别: " + result.TextConfidence.ToString(); // Rejected, Low, Medium, High
            }
            catch (Exception ex)
            {
                if ((uint)ex.HResult == 0x800455BC)
                {
                    lblMsg.Text = "当前语音识别器不支持所请求的语言: " + speechRecognizer.GetRecognizer().Language;
                }
                else
                {
                    lblMsg.Text = ex.ToString();
                }
            }
        }
    }
}

3、演示如何通过语音命令启动 app，以及 app 启动后如何获取启动此 app 的语音命令的标识和内容
Speech/VoiceCommandDefinition.xml

<?xml version="1.0" encoding="utf-8"?>
<VoiceCommands xmlns="http://schemas.microsoft.com/voicecommands/1.0">
  <CommandSet xml:lang="zh-cn">

    <!--命令前缀，不指定此值的话则会使用程序名做命令前缀-->
    <CommandPrefix>贪吃蛇</CommandPrefix>
    <!--语音监听窗口会随机显示不同 app 的语音命令提示文字（贪吃蛇 开始），轮到此 app 的时候就可能会显示这个-->
    <Example>开始</Example>

    <Command Name="PlayGame">
      <!--语音监听窗口会随机显示不同 app 的语音命令提示文字（贪吃蛇 开始），轮到此 app 的时候就可能会显示这个-->
      <Example>开始</Example>
      <!--监听语法-->
      <ListenFor>[马上] 开始</ListenFor>
      <!--监听语法-->
      <ListenFor>[马上] 启动</ListenFor>
      <!--准备启动目标 app 时，在监听窗口中显示的提示文字（当在语音设置中启用了“播放音频确认”时，此文字会作为 TTS 的文本）-->
      <Feedback>准备启动</Feedback>
      <!--启动页-->
      <Navigate Target="/Speech/VoiceCommands.xaml" />
    </Command>

    <Command Name="PlayLevel">
      <!--语音监听窗口会随机显示不同 app 的语音命令提示文字（贪吃蛇 从等级 2 开始），轮到此 app 的时候就可能会显示这个-->
      <Example>从等级 2 开始</Example>
      <!--监听语法-->
      <ListenFor>从等级 {number} 开始</ListenFor>
      <!--准备启动目标 app 时，在监听窗口中显示的提示文字（当在语音设置中启用了“播放音频确认”时，此文字会作为 TTS 的文本）-->
      <Feedback>正转到等级 {number}... </Feedback>
      <!--启动页-->
      <Navigate Target="/Speech/VoiceCommands.xaml" />
    </Command>

    <!--ListenFor 和 Feedback 可以通过 {number} 来引用此集合-->
    <PhraseList Label="number">
      <Item>1</Item>
      <Item>2</Item>
      <Item>3</Item>
    </PhraseList>

  </CommandSet>
</VoiceCommands>

<!--
本例可以识别：贪吃蛇开始，贪吃蛇马上开始，贪吃蛇启动，贪吃蛇马上启动，贪吃蛇从等级 1 开始，从等级 2 开始，从等级 3 开始

Visual Studio 有创建 VoiceCommandDefinition（语音命令定义）文件的模板
关于 VoiceCommands 的详细说明参见：http://msdn.microsoft.com/en-us/library/windowsphone/develop/jj207041
-->

Speech/VoiceCommands.xaml

<phone:PhoneApplicationPage
    x:Class="Demo.Speech.VoiceCommands"
    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
    xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
    xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
    FontFamily="{StaticResource PhoneFontFamilyNormal}"
    FontSize="{StaticResource PhoneFontSizeNormal}"
    Foreground="{StaticResource PhoneForegroundBrush}"
    SupportedOrientations="Portrait" Orientation="Portrait"
    mc:Ignorable="d"
    shell:SystemTray.IsVisible="True">

    <Grid Background="Transparent">
        <StackPanel Orientation="Vertical">

            <TextBlock Name="lblMsg" TextWrapping="Wrap" Text="返回到开始屏幕，长按 windows 键，说出你的语音命令（语音命令的定义参见 VoiceCommandDefinition.xml）" />

        </StackPanel>
    </Grid>
    
</phone:PhoneApplicationPage>

Speech/VoiceCommands.xaml.cs

/*
 * 演示如何通过语音命令启动 app，以及 app 启动后如何获取启动此 app 的语音命令的标识和内容
 * 
 * 
 * 语音识别：用于在 app 内识别语音
 * 语音命令：用于在 app 外通过语音命令启动 app
 * 
 * 
 * 注：
 * 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> <Capability Name="ID_CAP_MICROPHONE" />
 * 2、关于 VoiceCommands 的详细说明参见：http://msdn.microsoft.com/en-us/library/windowsphone/develop/jj207041
 */

using System;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.VoiceCommands;
using System.Windows.Navigation;

namespace Demo.Speech
{
    public partial class VoiceCommands : PhoneApplicationPage
    {
        public VoiceCommands()
        {
            InitializeComponent();

            this.Loaded += VoiceCommands_Loaded;
        }

        private async void VoiceCommands_Loaded(object sender, RoutedEventArgs e)
        {
            // 向系统注册本 app 的语音命令定义
            await VoiceCommandService.InstallCommandSetsFromFileAsync(new Uri("ms-appx:///Speech/VoiceCommandDefinition.xml"));

            // 获取语音命令定义的 CommandSet 中的内容，可以动态修改
            // VoiceCommandService.InstalledCommandSets
        }

        protected override void OnNavigatedTo(NavigationEventArgs e)
        {
            // 通过语音命令启动时，url 类似如下：/Speech/VoiceCommands.xaml?voiceCommandName=PlayGame&reco=%E8%B4%AA%E5%90%83%E8%9B%87%20%E5%BC%80%E5%A7%8B

            if (NavigationContext.QueryString.ContainsKey("voiceCommandName"))
            {
                lblMsg.Text = "语音命令的标识: " + NavigationContext.QueryString["voiceCommandName"];
                lblMsg.Text += Environment.NewLine;
                lblMsg.Text += "语音命令的内容: " + NavigationContext.QueryString["reco"];
            }

            base.OnNavigatedTo(e);
        }
    }
}