clean的动作会在provide和delete阶段才会触发
从代码分析:
对节点执行的node provide/deleted/clean会先发送到ironicclient ironicclient.call("node.set_provision_state", node.uuid,"deleted") class Client(object): self.node = node.NodeManager(self.http_client) class NodeManager(base.CreateManager): def set_provision_state(self, node_uuid, state, configdrive=None, cleansteps=None, rescue_password=None) 在这个函数里面会构造一个http请求,包含一个url和一个body: url = /v1/nodes/node_id/states/provision patch = {'target': deleted} 然后发送给ironic-api
ironic/api/controllers/v1/__init__.py ------>>> class Controller(rest.RestController): nodes = node.NodesController() ports = port.PortsController() portgroups = portgroup.PortgroupsController() chassis = chassis.ChassisController() drivers = driver.DriversController() volume = volume.VolumeController() lookup = ramdisk.LookupController() heartbeat = ramdisk.HeartbeatController() ironic/api/controllers/v1/node.py ---->>> class NodesController(rest.RestController): states = NodeStatesController() ironic/api/controllers/v1/node.py ---->>> class NodeStatesController(rest.RestController): _custom_actions = { 'power': ['PUT'], 'provision': ['PUT'], 'raid': ['PUT'], } def provision(self, node_ident, target, configdrive=None, clean_steps=None, rescue_password=None): ...... ...... elif target == ir_states.DELETED: pecan.request.rpcapi.do_node_tear_down( pecan.request.context, rpc_node.uuid, topic) elif target == ir_states.VERBS['clean']: if not clean_steps: msg = (_('"clean_steps" is required when setting target ' 'provision state to %s') % ir_states.VERBS['clean']) raise wsme.exc.ClientSideError( msg, status_code=http_client.BAD_REQUEST) _check_clean_steps(clean_steps) pecan.request.rpcapi.do_node_clean( pecan.request.context, rpc_node.uuid, clean_steps, topic) elif target in PROVISION_ACTION_STATES: pecan.request.rpcapi.do_provisioning_action( pecan.request.context, rpc_node.uuid, target, topic) ironic/conductor/manager.py ----->>> class ConductorManager(base_manager.BaseConductorManager): ...... ...... def do_node_tear_down(self, context, node_id): ...... try: task.process_event( 'delete', callback=self._spawn_worker, call_args=(self._do_node_tear_down, task, task.node.provision_state), err_handler=utils.provisioning_error_handler) def do_node_clean(self, context, node_id, clean_steps): ...... try: task.process_event( 'clean', callback=self._spawn_worker, call_args=(self._do_node_clean, task, clean_steps), err_handler=utils.provisioning_error_handler, target_state=states.MANAGEABLE) def do_provisioning_action(self, context, node_id, action): ...... with task_manager.acquire(context, node_id, shared=False, purpose='provision action %s' % action) as task: node = task.node if (action == states.VERBS['provide'] and node.provision_state == states.MANAGEABLE): task.process_event( 'provide', callback=self._spawn_worker, call_args=(self._do_node_clean, task), err_handler=utils.provisioning_error_handler) return 最终都会调用到ironic-conductor的def _do_node_clean()函数,执行清理 ironic/conductor/manager.py ----->>> class ConductorManager(base_manager.BaseConductorManager): def _do_node_clean(self, task, clean_steps=None): node = task.node manual_clean = clean_steps is not None clean_type = 'manual' if manual_clean else 'automated' 在这块就根据根据clean_setp判断清理是自动还是手动清理 如果在ironic.conf中automated_clean为false,且没有指定clean_steps,则直接进入available状态 if not manual_clean and not CONF.conductor.automated_clean: # Skip cleaning, move to AVAILABLE. node.clean_step = None node.save() task.process_event('done') LOG.info('Automated cleaning is disabled, node %s has been ' 'successfully moved to AVAILABLE state.', node.uuid) return ironic/drivers/modules/agent.py ----->>> class AgentDeploy(AgentDeployMixin, base.DeployInterface): def prepare_cleaning(self, task): return deploy_utils.prepare_inband_cleaning( task, manage_boot=CONF.agent.manage_agent_boot) 引导ramdisk准备带内清理 ironic/drivers/modules/deploy_utils.py ---->>> def prepare_inband_cleaning(task, manage_boot=True): task.driver.network.add_cleaning_network(task) agent_add_clean_params(task) if manage_boot: ramdisk_opts = build_agent_options(task.node) task.driver.boot.prepare_ramdisk(task, ramdisk_opts) manager_utils.node_power_action(task, states.REBOOT) return states.CLEANWAIT 等待节点启动,ipa发送心跳回来。ipa执行完所有clean步骤后,执行tear_down_cleaning ironic/conductor/manager.py ----->>> def _do_next_clean_step(self, task, step_index): node = task.node manual_clean = node.target_provision_state == states.MANAGEABLE driver_internal_info = node.driver_internal_info if step_index is None: steps = [] else: steps = driver_internal_info['clean_steps'][step_index:] LOG.info('Executing %(state)s on node %(node)s, remaining steps: ' '%(steps)s', {'node': node.uuid, 'steps': steps, 'state': node.provision_state}) ironic/drivers/modules/agent.py ----->>> def tear_down_cleaning(self, task): deploy_utils.tear_down_inband_cleaning( task, manage_boot=CONF.agent.manage_agent_boot) 移除clean network,清理掉ipxe和dhcp相关的文件 ironic/drivers/modules/network/neutron.py --->>> def remove_cleaning_network(self, task): neutron.remove_ports_from_network( task, self.get_cleaning_network_uuid(task)) for port in task.ports: if 'cleaning_vif_port_id' in port.internal_info: internal_info = port.internal_info del internal_info['cleaning_vif_port_id'] port.internal_info = internal_info port.save()
ipa和conductor通信部分见另外一篇文章,ipa和ironic-conductor交互
IPA加载后通过心跳触发ironic-conductor继续clean动作
ironic/drivers/modules/agent_base_vendor.py --->>>处理ipa返回的心跳 class HeartbeatMixin(object): def heartbeat(self, task, callback_url, agent_version): ...... ...... elif node.provision_state == states.CLEANWAIT: self.continue_cleaning(task) ironic/drivers/modules/agent_base_vendor.py --->>> class AgentDeployMixin(HeartbeatMixin): def continue_cleaning(self, task, **kwargs): ..... elif node.provision_state == states.CLEANWAIT: _notify_conductor_resume_clean(task) ironic/drivers/modules/agent_base_vendor.py --->>> def _notify_conductor_resume_clean(task): uuid = task.node.uuid rpc = rpcapi.ConductorAPI() topic = rpc.get_topic_for(task.node) task.release_resources() rpc.continue_node_clean(task.context, uuid, topic=topic) ironic/conductor/rpcapi.py --->>>向conductor服务发出信号,启动下一个清理动作。 def continue_node_clean(self, context, node_id, topic=None): cctxt = self.client.prepare(topic=topic or self.topic, version='1.27') return cctxt.cast(context, 'continue_node_clean', node_id=node_id) ironic/conductor/manager.py --->>> def continue_node_clean(self, context, node_id): ...... task.spawn_after( self._spawn_worker, self._do_next_clean_step, task, next_step_index) def _do_next_clean_step(self, task, step_index): ...... try: result = interface.execute_clean_step(task, step) 如果该清理步骤未完成,则result为CLEANWAIT,该函数返回,等待下次心跳再次调用该函数 if result == states.CLEANWAIT: LOG.info('Clean step %(step)s on node %(node)s being ' 'executed asynchronously, waiting for driver.', {'node': node.uuid, 'step': step}) target_state = states.MANAGEABLE if manual_clean else None task.process_event('wait', target_state=target_state) return 该步骤执行完毕后,会从driver_internal_info中将该清理步骤去掉 node.clean_step = None driver_internal_info['clean_steps'] = None driver_internal_info.pop('clean_step_index', None) node.driver_internal_info = driver_internal_info node.save() 传送给ipa来执行对应的clean command ironic/drivers/modules/deploy_utils.py ---->>> def agent_execute_clean_step(task, step): client = agent_client.AgentClient() ports = objects.Port.list_by_node_id( task.context, task.node.id) result = client.execute_clean_step(step, task.node, ports) ironic/drivers/modules/agent_client.py --->>> class AgentClient(object): def execute_clean_step(self, step, node, ports): params = { 'step': step, 'node': node.as_dict(), 'ports': [port.as_dict() for port in ports], 'clean_version': node.driver_internal_info.get( 'hardware_manager_version') } return self._command(node=node, method='clean.execute_clean_step', params=params) 与ipa API交互 def _command(self, node, method, params, wait=False): url = self._get_command_url(node) body = self._get_command_body(method, params) request_params = { 'wait': str(wait).lower() try: response = self.session.post(url, params=request_params, data=body)
IPA代码执行clean过程
ironic-python-agent/extentions/clean.py --->>>执行一个清理的步骤 class CleanExtension(base.BaseAgentExtension): def execute_clean_step(self, step, node, ports, clean_version=None,**kwargs): ...... ...... result = hardware.dispatch_to_managers(step['step'], node, ports)调用hardware的方法,按优先级获取硬件清理方法 清理磁盘 ironic_python_agent/hardware.py --->>> class HardwareManager(object): 清除任何保存用户数据的设备 def erase_devices(self, node, ports): erase_results = {} block_devices = self.list_block_devices() for block_device in block_devices: result = dispatch_to_managers( 'erase_block_device', node=node, block_device=block_device) erase_results[block_device.name] = result return erase_results def erase_block_device(self, node, block_device): if self._is_virtual_media_device(block_device): LOG.info("Skipping the erase of virtual media device %s", block_device.name) return try: if self._ata_erase(block_device):判断设备是否支持安全擦除 return ....... if self._shred_block_device(node, block_device): return def _ata_erase(self, block_device):如果支持安全擦除则调用haparm命令进行擦除 security_lines = self._get_ata_security_lines(block_device) ...... if 'enabled' in security_lines: try: utils.execute('hdparm', '--user-master', 'u', '--security-unlock', 'NULL', block_device.name) security_lines = self._get_ata_security_lines(block_device) def _shred_block_device(self, node, block_device):或者用shred擦除磁盘 info = node.get('driver_internal_info', {}) npasses = info.get('agent_erase_devices_iterations', 1) args = ('shred', '--force') if info.get('agent_erase_devices_zeroize', True): args += ('--zero', ) args += ('--verbose', '--iterations', str(npasses), block_device.name)
但谈何容易。