ROS语音识别
一、语音识别包
1、安装
$ sudo apt-get install gstreamer0.10-pocketsphinx $ sudo apt-get install ros-indigo-audio-common //我安装的是indigo版本的 $ sudo apt-get install libasound2
$ sudo apt-get install gstreamer0.10-gconf
然后来安装ROS包:
sudo apt-get install ros-indigo-pocketsphinx
2. 安装完成后我们就可以运行测试了。
首先,插入你的麦克风设备,然后在系统设置里测试麦克风是否有语音输入。
然后,运行包中的测试程序:
$ roslaunch pocketsphinx robocup.launch
我们也可以直接看ROS最后发布的结果消息:
$ rostopic echo /recognizer/output
二、语音库
1、查看语音库
$ roscd rbx1_speech/config $ more nav_commands.txt
这就是需要添加的文本,我们也可以修改其中的某些文本,改成自己需要的。
然后我们要把这个文件在线生成语音信息和库文件,这一步需要登陆网站http://www.speech.cs.cmu.edu/tools/lmtool-new.html,根据网站的提示上传文件,然后在线编译生成库文件。
上传nav_commands.txt文件
把下载的文件都解压放在rbx1_speech包的config文件夹下。我们可以给这些文件改个名字:
rename -f 's/593/nav_commands/' *
在rbx1_speech/launch文件夹下看看voice_nav_commands.launch这个文件:
<launch> <node name="recognizer" pkg="pocketsphinx" type="recognizer.py" output="screen"> <param name="lm" value="$(find rbx1_speech)/config/nav_commands.lm"/> <param name="dict" value="$(find rbx1_speech)/config/nav_commands.dic"/> </node> </launch>
可以看到,这个launch文件在运行recognizer.py节点的时候使用了我们生成的语音识别库和文件参数,这样就可以实用我们自己的语音库来进行语音识别了。
通过之前的命令来测试一下效果如何吧:
$ roslaunch rbx1_speech voice_nav_commands.launch
$ rostopic echo /recognizer/output
三、语音控制
1、机器人控制节点
在pocketsphinx包中本身有一个语音控制发布Twist消息的例程voice_cmd_vel.py,rbx1_speech包对其进行了一些简化修改,在nodes文件夹里可以查看voice_nav.py文件:
#!/usr/bin/env python """ voice_nav.py - Version 1.1 2013-12-20 Allows controlling a mobile base using simple speech commands. Based on the voice_cmd_vel.py script by Michael Ferguson in the pocketsphinx ROS package. See http://www.ros.org/wiki/pocketsphinx """ import rospy from geometry_msgs.msg import Twist from std_msgs.msg import String from math import copysign class VoiceNav: def __init__(self): rospy.init_node('voice_nav') rospy.on_shutdown(self.cleanup) # Set a number of parameters affecting the robot's speed self.max_speed = rospy.get_param("~max_speed", 0.4) self.max_angular_speed = rospy.get_param("~max_angular_speed", 1.5) self.speed = rospy.get_param("~start_speed", 0.1) self.angular_speed = rospy.get_param("~start_angular_speed", 0.5) self.linear_increment = rospy.get_param("~linear_increment", 0.05) self.angular_increment = rospy.get_param("~angular_increment", 0.4) # We don't have to run the script very fast self.rate = rospy.get_param("~rate", 5) r = rospy.Rate(self.rate) # A flag to determine whether or not voice control is paused self.paused = False # Initialize the Twist message we will publish. self.cmd_vel = Twist() # Publish the Twist message to the cmd_vel topic self.cmd_vel_pub = rospy.Publisher('cmd_vel', Twist, queue_size=5) # Subscribe to the /recognizer/output topic to receive voice commands. rospy.Subscriber('/recognizer/output', String, self.speech_callback) # A mapping from keywords or phrases to commands self.keywords_to_command = {'stop': ['stop', 'halt', 'abort', 'kill', 'panic', 'off', 'freeze', 'shut down', 'turn off', 'help', 'help me'], 'slower': ['slow down', 'slower'], 'faster': ['speed up', 'faster'], 'forward': ['forward', 'ahead', 'straight'], 'backward': ['back', 'backward', 'back up'], 'rotate left': ['rotate left'], 'rotate right': ['rotate right'], 'turn left': ['turn left'], 'turn right': ['turn right'], 'quarter': ['quarter speed'], 'half': ['half speed'], 'full': ['full speed'], 'pause': ['pause speech'], 'continue': ['continue speech']} rospy.loginfo("Ready to receive voice commands") # We have to keep publishing the cmd_vel message if we want the robot to keep moving. while not rospy.is_shutdown(): self.cmd_vel_pub.publish(self.cmd_vel) r.sleep() def get_command(self, data): # Attempt to match the recognized word or phrase to the # keywords_to_command dictionary and return the appropriate # command for (command, keywords) in self.keywords_to_command.iteritems(): for word in keywords: if data.find(word) > -1: return command def speech_callback(self, msg): # Get the motion command from the recognized phrase command = self.get_command(msg.data) # Log the command to the screen rospy.loginfo("Command: " + str(command)) # If the user has asked to pause/continue voice control, # set the flag accordingly if command == 'pause': self.paused = True elif command == 'continue': self.paused = False # If voice control is paused, simply return without # performing any action if self.paused: return # The list of if-then statements should be fairly # self-explanatory if command == 'forward': self.cmd_vel.linear.x = self.speed self.cmd_vel.angular.z = 0 elif command == 'rotate left': self.cmd_vel.linear.x = 0 self.cmd_vel.angular.z = self.angular_speed elif command == 'rotate right': self.cmd_vel.linear.x = 0 self.cmd_vel.angular.z = -self.angular_speed elif command == 'turn left': if self.cmd_vel.linear.x != 0: self.cmd_vel.angular.z += self.angular_increment else: self.cmd_vel.angular.z = self.angular_speed elif command == 'turn right': if self.cmd_vel.linear.x != 0: self.cmd_vel.angular.z -= self.angular_increment else: self.cmd_vel.angular.z = -self.angular_speed elif command == 'backward': self.cmd_vel.linear.x = -self.speed self.cmd_vel.angular.z = 0 elif command == 'stop': # Stop the robot! Publish a Twist message consisting of all zeros. self.cmd_vel = Twist() elif command == 'faster': self.speed += self.linear_increment self.angular_speed += self.angular_increment if self.cmd_vel.linear.x != 0: self.cmd_vel.linear.x += copysign(self.linear_increment, self.cmd_vel.linear.x) if self.cmd_vel.angular.z != 0: self.cmd_vel.angular.z += copysign(self.angular_increment, self.cmd_vel.angular.z) elif command == 'slower': self.speed -= self.linear_increment self.angular_speed -= self.angular_increment if self.cmd_vel.linear.x != 0: self.cmd_vel.linear.x -= copysign(self.linear_increment, self.cmd_vel.linear.x) if self.cmd_vel.angular.z != 0: self.cmd_vel.angular.z -= copysign(self.angular_increment, self.cmd_vel.angular.z) elif command in ['quarter', 'half', 'full']: if command == 'quarter': self.speed = copysign(self.max_speed / 4, self.speed) elif command == 'half': self.speed = copysign(self.max_speed / 2, self.speed) elif command == 'full': self.speed = copysign(self.max_speed, self.speed) if self.cmd_vel.linear.x != 0: self.cmd_vel.linear.x = copysign(self.speed, self.cmd_vel.linear.x) if self.cmd_vel.angular.z != 0: self.cmd_vel.angular.z = copysign(self.angular_speed, self.cmd_vel.angular.z) else: return self.cmd_vel.linear.x = min(self.max_speed, max(-self.max_speed, self.cmd_vel.linear.x)) self.cmd_vel.angular.z = min(self.max_angular_speed, max(-self.max_angular_speed, self.cmd_vel.angular.z)) def cleanup(self): # When shutting down be sure to stop the robot! twist = Twist() self.cmd_vel_pub.publish(twist) rospy.sleep(1) if __name__=="__main__": try: VoiceNav() rospy.spin() except rospy.ROSInterruptException: rospy.loginfo("Voice navigation terminated.")
2、仿真测试
和前面一样,我们在rviz中进行仿真测试。
首先是运行一个机器人模型:
$ roslaunch rbx1_bringup fake_turtlebot.launch
然后打开rviz:
$ rosrun rviz rviz -d `rospack find rbx1_nav`/sim.rviz
最后就是机器人的控制节点了:
$ roslaunch rbx1_speech voice_nav_commands.launch 再打开语音识别的节点:
$ roslaunch rbx1_speech turtlebot_voice_nav.launch 最后就是机器人的控制节点了:
OK,打开上面这一堆的节点之后,就可以开始了。可以使用的命令如下:
可能是英语太差吧,很难识别;
四、播放语音
运行下面的命令:
$ rosrun sound_play soundplay_node.py $ rosrun sound_play say.py "Greetings Humans. Take me to your leader."
有没有听见声音!ROS通过识别我们输入的文本,让机器人读了出来。发出这个声音的人叫做kal_diphone,如果不喜欢,我们也可以换一个人来读:
$ sudo apt-get install festvox-don $ rosrun sound_play say.py "Welcome to the future" voice_don_diphone
哈哈,这回换了一个人吧,好吧,这不也是我们的重点。
在rbx1_speech/nodes文件夹中有一个让机器人说话的节点talkback.py:
#!/usr/bin/env python """ talkback.py - Version 1.1 2013-12-20 Use the sound_play client to say back what is heard by the pocketsphinx recognizer. Created for the Pi Robot Project: http://www.pirobot.org Copyright (c) 2012 Patrick Goebel. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.5 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details at: http://www.gnu.org/licenses/gpl.htmlPoint """ import rospy from std_msgs.msg import String from sound_play.libsoundplay import SoundClient import sys class TalkBack: def __init__(self, script_path): rospy.init_node('talkback') rospy.on_shutdown(self.cleanup) # Set the default TTS voice to use self.voice = rospy.get_param("~voice", "voice_don_diphone") # Set the wave file path if used self.wavepath = rospy.get_param("~wavepath", script_path + "/../sounds") # Create the sound client object self.soundhandle = SoundClient() # Wait a moment to let the client connect to the # sound_play server rospy.sleep(1) # Make sure any lingering sound_play processes are stopped. self.soundhandle.stopAll() # Announce that we are ready for input self.soundhandle.playWave(self.wavepath + "/R2D2a.wav") rospy.sleep(1) self.soundhandle.say("Ready", self.voice) rospy.loginfo("Say one of the navigation commands...") # Subscribe to the recognizer output and set the callback function rospy.Subscriber('/recognizer/output', String, self.talkback) def talkback(self, msg): # Print the recognized words on the screen rospy.loginfo(msg.data) # Speak the recognized words in the selected voice self.soundhandle.say(msg.data, self.voice) # Uncomment to play one of the built-in sounds #rospy.sleep(2) #self.soundhandle.play(5) # Uncomment to play a wave file #rospy.sleep(2) #self.soundhandle.playWave(self.wavepath + "/R2D2a.wav") def cleanup(self): self.soundhandle.stopAll() rospy.loginfo("Shutting down talkback node...") if __name__=="__main__": try: TalkBack(sys.path[0]) rospy.spin() except rospy.ROSInterruptException: rospy.loginfo("Talkback node terminated.")
我们来运行看一下效果:
$ roslaunch rbx1_speech talkback.launch