1、用户调用Nova的rescue函数
nova/virt/ironic/driver.py class IronicDriver(virt_driver.ComputeDriver): ...... ...... #导入ironicclient模块 def __init__(self, virtapi, read_only=False): super(IronicDriver, self).__init__(virtapi) global ironic if ironic is None: ironic = importutils.import_module('ironicclient') ...... self.ironicclient = client_wrapper.IronicClientWrapper() def spawn(self, context, instance, image_meta, injected_files, admin_password, allocations, network_info=None, block_device_info=None): ...... #调用ironicclient.call方法,触发节点部署 try: self.ironicclient.call("node.set_provision_state", node_uuid, ironic_states.ACTIVE, configdrive=configdrive_value) ...... try: ##Virt驱动程序在等待provision_state更改时循环,并根据需要更新Nova状态 timer.start(interval=CONF.ironic.api_retry_interval).wait() LOG.info('Successfully provisioned Ironic node %s', node.uuid, instance=instance)
ironic/api/controllers/v1/node.py #ronic API接收set_provision_state调用,并执行do_node_rescue RPC调用 class NodeStatesController(rest.RestController): def provision(self, node_ident, target, configdrive=None, clean_steps=None, rescue_password=None): ..... elif (target == ir_states.VERBS['rescue']): if not (rescue_password and rescue_password.strip()): msg = (_('A non-empty "rescue_password" is required when ' 'setting target provision state to %s') % ir_states.VERBS['rescue']) raise wsme.exc.ClientSideError( msg, status_code=http_client.BAD_REQUEST) pecan.request.rpcapi.do_node_rescue( pecan.request.context, rpc_node.uuid, rescue_password, topic)
ironic/conductor/manager.py class ConductorManager(base_manager.BaseConductorManager): ...... def do_node_rescue(self, context, node_id, rescue_password): ...... #保存节点的救援密码 instance_info = node.instance_info instance_info['rescue_password'] = rescue_password node.instance_info = instance_info node.save()#Ironic conductor在instance_info中设置了救援密码并将通知给相应的驱动 try: task.driver.power.validate(task) task.driver.rescue.validate(task) task.driver.network.validate(task) try: task.process_event( 'rescue', callback=self._spawn_worker, call_args=(self._do_node_rescue, task),#内部RPC方法来救援现有的节点部署 err_handler=utils.spawn_rescue_error_handler) def _do_node_rescue(self, task): ...... try: next_state = task.driver.rescue.rescue(task) if next_state == states.RESCUEWAIT: task.process_event('wait') elif next_state == states.RESCUE: task.process_event('done')
ironic/drivers/modules/agent.py class AgentRescue(base.RescueInterface): ..... #在节点上启动一个救援ramdisk def rescue(self, task): #重置电源状态 manager_utils.node_power_action(task, states.POWER_OFF) #清理实例 task.driver.boot.clean_up_instance(task) #取消节点的租户网络 task.driver.network.unconfigure_tenant_networks(task) #为每个端口创建neutron端口以启动救援虚拟磁盘 task.driver.network.add_rescuing_network(task) if CONF.agent.manage_agent_boot: ramdisk_opts = deploy_utils.build_agent_options(task.node) #使用PXE准备Ironic ramdisk的引导 task.driver.boot.prepare_ramdisk(task, ramdisk_opts) #重置电源状态为POWER_ON manager_utils.node_power_action(task, states.POWER_ON) return states.RESCUEWAIT
ironic/drivers/modules/pxe.py class PXEBoot(base.BootInterface): ...... def prepare_ramdisk(self, task, ramdisk_params): node = task.node mode = deploy_utils.rescue_or_deploy_mode(node) if CONF.pxe.ipxe_enabled: #将iPXE引导脚本呈现到HTTP根目录 pxe_utils.create_ipxe_boot_script() dhcp_opts = pxe_utils.dhcp_options_for_instance(task)#检索DHCP PXE启动选项 provider = dhcp_factory.DHCPFactory() provider.update_dhcp(task, dhcp_opts)#发送或更新此节点的DHCP BOOT选项 pxe_info = _get_image_info(node, mode=mode)#为救援镜像生成TFTP文件的路径 manager_utils.node_set_boot_device(task, boot_devices.PXE, persistent=persistent) if CONF.pxe.ipxe_enabled and CONF.pxe.ipxe_use_swift: kernel_label = '%s_kernel' % mode ramdisk_label = '%s_ramdisk' % mode pxe_info.pop(kernel_label, None) pxe_info.pop(ramdisk_label, None) if pxe_info: _cache_ramdisk_kernel(task.context, node, pxe_info)
ipa和ironic-conductor交互,Agent ramdisk启动后,回调/v1/lookup获取节点信息, 发送心跳
ironic/drivers/modules/agent_base_vendor.py class HeartbeatMixin(object): ...... def heartbeat(self, task, callback_url, agent_version): ...... try: ..... elif (node.provision_state == states.RESCUEWAIT): msg = _('Node failed to perform rescue operation.') self._finalize_rescue(task) def _finalize_rescue(self, task): node = task.node try: result = self._client.finalize_rescue(node)
ironic/drivers/modules/agent_client.py class AgentClient(object): #指示虚拟磁盘完成救援模式的进入 def finalize_rescue(self, node): #根据config drive和rescue password调用finalize_rescue(RESCUEWAIT -> RESCUING),向ipa传入rescue_password rescue_pass = node.instance_info.get('rescue_password') params = {'rescue_password': rescue_pass} return self._command(node=node, method='rescue.finalize_rescue', params=params) def _command(self, node, method, params, wait=False): #向ipa发送命令 url = self._get_command_url(node) body = self._get_command_body(method, params) request_params = { 'wait': str(wait).lower() try: response = self.session.post(url, params=request_params, data=body)
ironic_python_agent/extensions/rescue.py PASSWORD_FILE = '/etc/ipa-rescue-config/ipa-rescue-password' class RescueExtension(base.BaseAgentExtension): def finalize_rescue(self, rescue_password=""): self.write_rescue_password(rescue_password) self.agent.serve_api = False #关闭api接口 return def write_rescue_password(self, rescue_password=""): LOG.debug('Writing hashed rescue password to %s', PASSWORD_FILE) salt = self.make_salt() hashed_password = crypt.crypt(rescue_password, salt) try: with open(PASSWORD_FILE, 'w') as f: f.write(hashed_password)#把救援密码写入到/etc/ipa-rescue-config/ipa-rescue-password
ironic/drivers/modules/agent_base_vendor.py class HeartbeatMixin(object): #调用ramdisk来准备救援模式并验证结果 def _finalize_rescue(self, task): node = task.node try: result = self._client.finalize_rescue(node) task.process_event('resume')#恢复node的状态 task.driver.rescue.clean_up(task)#清理此节点的部署环境 task.driver.network.configure_tenant_networks(task)#将网络调整到之前的租户网络 task.process_event('done')#返回task状态为done
但谈何容易。