using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using System.Speech.Synthesis; using System.Speech.AudioFormat; using System.IO; namespace TTSApp { public partial class FrmMain : Form { private List<AudioFormatInfo> formatList = new List<AudioFormatInfo>(); //音频格式列表 private SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(); //语音合成器 private PictureBox pic1 = new PictureBox(); //用于显示嘴型动画 private PictureBox pic2 = new PictureBox(); private AudioFormatInfo formatInfo; private string text; private bool showEvents; /// <summary> /// 窗体构造 /// </summary> public FrmMain() { InitializeComponent(); speechSynthesizer.SpeakProgress += new EventHandler<SpeakProgressEventArgs>(speechSynthesizer_SpeakProgress); //朗读进度监视 speechSynthesizer.VisemeReached += new EventHandler<VisemeReachedEventArgs>(speechSynthesizer_VisemeReached); //嘴型动画事件 speechSynthesizer.SpeakCompleted += new EventHandler<SpeakCompletedEventArgs>(speechSynthesizer_SpeakCompleted); //朗读完成时触发 speechSynthesizer.Rate = this.tbRate.Value; //默认语速 speechSynthesizer.Volume = this.tbVolume.Value; //默认音量 //初始化视位到达用于显示嘴型动画的PictureBox pic1.Size = new Size(128, 128); pic1.BackgroundImage = imageList.Images[14]; pic2.Size = new Size(128, 128); pic2.BackColor = Color.Transparent; pic1.Controls.Add(pic2); pic1.Location = new Point(5, 15); this.gbMouth.Controls.Add(pic1); this.showEvents = this.chkAllEvent.Checked; } /// <summary> /// 窗体加载 /// </summary> private void FrmMain_Load(object sender, EventArgs e) { BindVoiceList(); BindFormatList(); } /// <summary> /// 绑定语音库列表 /// </summary> private void BindVoiceList() { this.cbVoive.DataSource = (from item in speechSynthesizer.GetInstalledVoices() select item.VoiceInfo.Name).ToList(); } /// <summary> /// 绑定音频格式列表 /// </summary> private void BindFormatList() { formatList.Add(new AudioFormatInfo("8kHz 8Bit Mono", 8000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("8kHz 8Bit Stereo", 8000, AudioBitsPerSample.Eight, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("8kHz 16Bit Mono", 8000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("8kHz 16Bit Stereo", 8000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("11kHz 8Bit Mono", 11000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("11kHz 8Bit Stereo", 11000, AudioBitsPerSample.Eight, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("11kHz 16Bit Mono", 11000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("11kHz 16Bit Stereo", 11000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("12kHz 8Bit Mono", 12000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("12kHz 8Bit Stereo", 12000, AudioBitsPerSample.Eight, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("12kHz 16Bit Mono", 12000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("12kHz 16Bit Stereo", 12000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("16kHz 8Bit Mono", 16000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("16kHz 8Bit Stereo", 16000, AudioBitsPerSample.Eight, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("16kHz 16Bit Mono", 16000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("16kHz 16Bit Stereo", 16000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("22kHz 8Bit Mono", 22000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("22kHz 8Bit Stereo", 22000, AudioBitsPerSample.Eight, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("22kHz 16Bit Mono", 22000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("22kHz 16Bit Stereo", 22000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("24kHz 8Bit Mono", 24000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("24kHz 8Bit Stereo", 24000, AudioBitsPerSample.Eight, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("24kHz 16Bit Mono", 24000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("24kHz 16Bit Stereo", 24000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("32kHz 8Bit Mono", 32000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("32kHz 8Bit Stereo", 32000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("32kHz 16Bit Mono", 32000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("32kHz 16Bit Stereo", 32000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("44kHz 8Bit Mono", 44000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("44kHz 8Bit Stereo", 44000, AudioBitsPerSample.Eight, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("44kHz 16Bit Mono", 44000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("44kHz 16Bit Stereo", 44000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("48kHz 8Bit Mono", 48000, AudioBitsPerSample.Eight, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("48kHz 8Bit Stereo", 48000, AudioBitsPerSample.Eight, AudioChannel.Stereo)); formatList.Add(new AudioFormatInfo("48kHz 16Bit Mono", 48000, AudioBitsPerSample.Sixteen, AudioChannel.Mono)); formatList.Add(new AudioFormatInfo("48kHz 16Bit Stereo", 48000, AudioBitsPerSample.Sixteen, AudioChannel.Stereo)); this.cbFormat.DataSource = formatList; this.cbFormat.DisplayMember = "FromatName"; this.formatInfo = this.cbFormat.SelectedItem as AudioFormatInfo; } /// <summary> /// 朗读进度监视 /// </summary> private void speechSynthesizer_SpeakProgress(object sender, SpeakProgressEventArgs e) { string selectedText = e.Text; int startIndex = text.IndexOf(selectedText); int length = selectedText.Length; int lastLength = this.txtContent.Text.Length - text.Length; this.txtContent.Focus(); this.txtContent.Select(startIndex + lastLength, length); text = text.Substring(startIndex + length); } /// <summary> /// 嘴型动画事件 /// </summary> private void speechSynthesizer_VisemeReached(object sender, VisemeReachedEventArgs e) { if (showEvents) { this.txtMsg.AppendText("Viseme\r\n"); } int viseme = e.Viseme; viseme = viseme > 11 ? viseme - 12 : viseme; int index = viseme % 2 == 0 ? 12 : 13; this.pic1.Image = imageList.Images[viseme]; this.pic2.Image = imageList.Images[index]; } /// <summary> /// 朗读完毕 /// </summary> private void speechSynthesizer_SpeakCompleted(object sender, SpeakCompletedEventArgs e) { this.pic1.Image = null; this.pic2.Image = null; speechSynthesizer.SetOutputToNull(); this.txtContent.SelectAll(); } /// <summary> /// 改变语音库 /// </summary> private void cbVoive_SelectedIndexChanged(object sender, EventArgs e) { speechSynthesizer.SelectVoice(this.cbVoive.Text); } /// <summary> /// 变更音频格式 /// </summary> private void cbFormat_SelectedIndexChanged(object sender, EventArgs e) { this.formatInfo = this.cbFormat.SelectedItem as AudioFormatInfo; } /// <summary> /// 改变语速 /// </summary> private void tbRate_Scroll(object sender, EventArgs e) { speechSynthesizer.Rate = this.tbRate.Value; } /// <summary> /// 改变音量 /// </summary> private void tbVolume_Scroll(object sender, EventArgs e) { speechSynthesizer.Volume = this.tbVolume.Value; } /// <summary> /// 点击打开文本文件(.txt) /// </summary> private void btnOpenFile_Click(object sender, EventArgs e) { if (openFileDialog.ShowDialog() == DialogResult.OK) { this.txtContent.Text = File.ReadAllText(openFileDialog.FileName, Encoding.Default); } } /// <summary> /// 点击朗读 /// </summary> private void btnSpeak_Click(object sender, EventArgs e) { this.txtMsg.AppendText("Speak\r\n"); text = this.txtContent.Text; speechSynthesizer.SetOutputToDefaultAudioDevice(); speechSynthesizer.SpeakAsync(text); } /// <summary> /// 点击暂停/恢复 /// </summary> private void btnPause_Click(object sender, EventArgs e) { if (this.btnPause.Text == "Pause") { this.txtMsg.AppendText("Pause\r\n"); speechSynthesizer.Pause(); this.btnPause.Text = "Resume"; } else { this.txtMsg.AppendText("Resume\r\n"); speechSynthesizer.Resume(); this.btnPause.Text = "Pause"; } } /// <summary> /// 点击停止 /// </summary> private void btnStop_Click(object sender, EventArgs e) { this.txtMsg.AppendText("Stop\r\n"); speechSynthesizer.SpeakAsyncCancelAll(); } /// <summary> /// 点击保存 /// </summary> private void btnSave_Click(object sender, EventArgs e) { if (saveFileDialog.ShowDialog() == DialogResult.OK) { if (speechSynthesizer.State == SynthesizerState.Speaking) { speechSynthesizer.SpeakAsyncCancelAll(); } speechSynthesizer.SetOutputToWaveFile(saveFileDialog.FileName, formatInfo); speechSynthesizer.SpeakAsync(this.txtContent.Text); } } /// <summary> /// 使用Wav文件 /// </summary> private void btnSpeakWav_Click(object sender, EventArgs e) { if (openWav.ShowDialog() == DialogResult.OK) { PromptBuilder pb = new PromptBuilder(); pb.AppendAudio(openWav.FileName); speechSynthesizer.SetOutputToDefaultAudioDevice(); speechSynthesizer.SpeakAsync(pb); } } /// <summary> /// 重置 /// </summary> private void btnReset_Click(object sender, EventArgs e) { this.txtMsg.AppendText("Reset\r\n"); this.txtContent.Focus(); this.txtContent.Select(0, 0); } /// <summary> /// 是否显示所有事件信息 /// </summary> private void chkAllEvent_CheckedChanged(object sender, EventArgs e) { this.showEvents = this.chkAllEvent.Checked; } } }
/// <summary> /// 绑定语音库列表 /// </summary> private void BindVoiceList() { this.cbVoive.DataSource = (from item in speechSynthesizer.GetInstalledVoices() select item.VoiceInfo.Name).ToList(); }
/// <summary> /// 改变语音库 /// </summary> private void cbVoive_SelectedIndexChanged(object sender, EventArgs e) { speechSynthesizer.SelectVoice(this.cbVoive.Text); }
public void SetOutputToAudioStream(Stream audioDestination, SpeechAudioFormatInfo formatInfo); public void SetOutputToDefaultAudioDevice(); public void SetOutputToNull(); public void SetOutputToWaveFile(string path); public void SetOutputToWaveFile(string path, SpeechAudioFormatInfo formatInfo); public void SetOutputToWaveStream(Stream audioDestination);
A viseme is the basic position of the mouth and face when pronouncing a phoneme. Visemes are visual representations of phonemes.
System.Speech supports 21 visemes for US English, each of which corresponds to one or more phonemes. VisemeReached events are raised when a new phoneme reached has a different corresponding viseme than the previous phoneme reached. Since some visemes represent more than one phoneme, a VisemeReached event is not generated if the next phoneme reached corresponds to the same viseme as the previous phoneme. For example, for the spoken words “this zone”, a PhonemeReached event is raised for the “s” in “this” and the “z” in “zone”. However, a VisemeReached event is not raised for the “z” in “zone” because it corresponds to the same viseme as the “s” in “this”.
The following is a list of the 21 SAPI phonemes and phoneme groups that correspond to a viseme in US English.
0 silence
1 ae, ax, ah
2 aa
3 ao
4 ey, eh, uh
5 er
6 y, iy, ih, ix
7 w, uw
8 ow
9 aw
10 oy
11 ay
12 h
13 r
14 l
15 s, z
16 sh, ch, jh, zh
17 th, dh
18 f, v
19 d, t, n
20 k, g, ng
21 p, b, m
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步