当node节点state为manage时,可执行inspector
ironic node-set-provision-state <node_uuid> manage
ironic node-set-provision-state <node_uuid> inspect
inspect阶段
执行inspect后ironic会发送PUT请求到 /v1/nodes/{node_ident}/provision
, ironic-api 收到这个请求后会根据 body 的 target
字段做处理:
class NodeStatesController(rest.RestController): _custom_actions = { 'power': ['PUT'], 'provision': ['PUT'], 'raid': ['PUT'], } def get(self, node_ident): ....... def raid(self, node_ident, target_raid_config): ...... def power(self, node_ident, target, timeout=None): ...... def provision(self, node_ident, target, configdrive=None, clean_steps=None, rescue_password=None): if target in (ir_states.ACTIVE, ir_states.REBUILD):#判断状态 rebuild = (target == ir_states.REBUILD) pecan.request.rpcapi.do_node_deploy(context=pecan.request.context, node_id=rpc_node.uuid, rebuild=rebuild, configdrive=configdrive, topic=topic) elif target == ir_states.VERBS['inspect']:#调用inspect_hardware方法 pecan.request.rpcapi.inspect_hardware(pecan.request.context, rpc_node.uuid, topic=topic)
然后通过发送 http 请求 到 ironic-inspector。inspect 的具体实现是跟 driver 有关,在 driver.inspect.inspect_hardware 中
def inspect_hardware(self, task): #检查硬件以获取硬件属性 eventlet.spawn_n(_start_inspection, task.node.uuid, task.context) return states.INSPECTING def _start_inspection(node_uuid, context): try: _get_client(context).introspect(node_uuid)#调取_get_client函数,该函数调取keystone对inspector进行认证 def _get_client(context): ....... return client.ClientV1(api_version=INSPECTOR_API_VERSION, session=session, inspector_url=inspector_url)#调取inspetor client
class ClientV1(http.BaseClient): #客户端V1版本 ....... #创建一个客户端 def __init__(self, **kwargs): kwargs.setdefault('api_version', DEFAULT_API_VERSION) super(ClientV1, self).__init__(**kwargs) self.rules = RulesAPI(self.request)#获取自省规则 #启动节点自省 def introspect(self, uuid): if not isinstance(uuid, six.string_types): raise TypeError( _("Expected string for uuid argument, got %r") % uuid) self.request('post', '/introspection/%s' % uuid)
inspector处理阶段
@api('/v1/introspection/<node_id>', rule="introspection:{}", verb_to_rule_map={'GET': 'status', 'POST': 'start'}, methods=['GET', 'POST']) def api_introspection(node_id): if flask.request.method == 'POST':#如果请求是post,验证token,返回202,即接受请求 introspect.introspect(node_id, token=flask.request.headers.get('X-Auth-Token'))#调取introspect函数,为节点启动硬件属性自省。 return '', 202 else: node_info = node_cache.get_node(node_id) return flask.json.jsonify(generate_introspection_status(node_info))
def introspect(node_id, token=None): node_info = node_cache.start_introspection(node.uuid,#更新了ipmi信息,在attributes表里添加bmc_address信息 bmc_address=bmc_address, ironic=ironic) utils.executor().submit(_background_introspect, node_info, ironic) def _background_introspect(node_info, ironic): ...... ...... node_info.acquire_lock() _background_introspect_locked(node_info, ironic)#调用 _background_introspect做主机发现。 def _background_introspect_locked(node_info, ironic): ...... try: ironic.node.set_boot_device(node_info.uuid, 'pxe', persistent=False) except Exception as exc: LOG.warning('Failed to set boot device to PXE: %s', exc, node_info=node_info) try: ironic.node.set_power_state(node_info.uuid, 'reboot') except Exception as exc: raise utils.Error(_('Failed to power on the node, check it\'s ' 'power management configuration: %s'), exc, node_info=node_info)
ipa阶段
class IronicPythonAgent(base.ExecuteCommandMixin): ...... ...... def run(self): self.started_at = _time() hardware.load_managers() if self.hardware_initialization_delay > 0: LOG.info('Waiting %d seconds before proceeding', self.hardware_initialization_delay) time.sleep(self.hardware_initialization_delay) if not self.standalone: uuid = None if cfg.CONF.inspection_callback_url: uuid = inspector.inspect()#因配置了callback_url,跳转到inspect
def inspect(): .... if not CONF.inspection_callback_url: LOG.info('Inspection is disabled, skipping') return collector_names = [x.strip() for x in CONF.inspection_collectors.split(',') if x.strip()] LOG.info('inspection is enabled with collectors %s', collector_names) failures = utils.AccumulatedFailures(exc_class=errors.InspectionError) data = {} try: # 对应该文件中的collect_*函数,默认为default,对应collect_default ext_mgr = extension_manager(collector_names) collectors = [(ext.name, ext.plugin) for ext in ext_mgr] except Exception as exc: with excutils.save_and_reraise_exception(): failures.add(exc) call_inspector(data, failures) for name, collector in collectors: try: # 例:此处为collect_default collector(data, failures) except Exception as exc: failures.add('collector %s failed: %s', name, exc) # 跳转到call_inspector函数,该函数用于发送data给inspector resp = call_inspector(data, failures) failures.raise_if_needed() if resp is None: LOG.info('stopping inspection, as inspector returned an error') return LOG.info('inspection finished successfully') return resp.get('uuid') def collect_default(data, failures): # 调用ironic_python_agent.hardware.py中的list_hardware_info函数 inventory = hardware.dispatch_to_managers('list_hardware_info') # 添加到data中 data['inventory'] = inventory
ipa 可以看到除了collect_default,还提供了collect_logs、collect_extra_hardware、collect_pci_devices_info三个函数,分别用于收集系统日志、收集benchmark、收集pci设备信息
ironic_python_agent.hardware.py 可以看看collect_default收集了哪些信息
class HardwareManager(object): ...... ..... def list_hardware_info(self): hardware_info = {} hardware_info['interfaces'] = self.list_network_interfaces()#网卡 hardware_info['cpu'] = self.get_cpus()#cpu hardware_info['disks'] = self.list_block_devices()#硬盘 hardware_info['memory'] = self.get_memory()#内存 hardware_info['bmc_address'] = self.get_bmc_address()#bmc地址 hardware_info['system_vendor'] = self.get_system_vendor_info()#系统厂商信息 hardware_info['boot'] = self.get_boot_info()#启动信息 return hardware_info#返回硬件信息
ipa收集BM信息并将其发送给ipa-inspection-callback-url
@api('/v1/continue', rule="introspection:continue", is_public_api=True, methods=['POST']) def api_continue(): data = flask.request.get_json(force=True)#检查是否是json格式的数据 if not isinstance(data, dict): raise utils.Error(_('Invalid data: expected a JSON object, got %s') % data.__class__.__name__) logged_data = {k: (v if k not in _LOGGING_EXCLUDED_KEYS else '<hidden>') for k, v in data.items()} LOG.debug("Received data from the ramdisk: %s", logged_data, data=data) return flask.jsonify(process.process(data))#跳转到process函数,处理来自ipa的自省数据
def process(introspection_data): unprocessed_data = copy.deepcopy(introspection_data) failures = [] _run_pre_hooks(introspection_data, failures) node_info = _find_node_info(introspection_data, failures)#根据ipmi_address和macs获取inpsector node if node_info: node_info.acquire_lock() ...... utils.executor().submit(_store_unprocessed_data, node_info, unprocessed_data)#多线程处理_store_unprocessed_data函数,存储数据 try: node = node_info.node()#从node_info中提取node ..... try: result = _process_node(node_info, node, introspection_data) #跳转到_process_node函数,该函数会重复检查node信息 def _process_node(node_info, node, introspection_data): ir_utils.check_provision_state(node)#检查node自省状态 _run_post_hooks(node_info, introspection_data) _store_data(node_info, introspection_data)#存储数据,如ironic-inspector配置中store_data为none,则不存储 ironic = ir_utils.get_client()#调用ironic client pxe_filter.driver().sync(ironic) node_info.invalidate_cache()#清除所有缓存的信息,以便下次重新加载 rules.apply(node_info, introspection_data)#对node应用规则 resp = {'uuid': node.uuid} #结束inspect流程,调用_finish函数,该函数处理关闭电源的过程 utils.executor().submit(_finish, node_info, ironic, introspection_data, power_off=CONF.processing.power_off) return resp def _finish(node_info, ironic, introspection_data, power_off=True): if power_off: LOG.debug('Forcing power off of node %s', node_info.uuid) try: ironic.node.set_power_state(node_info.uuid, 'off') except Exception as exc: if node_info.node().provision_state == 'enroll': LOG.info("Failed to power off the node in" "'enroll' state, ignoring; error was " "%s", exc, node_info=node_info, data=introspection_data) else: msg = (_('Failed to power off node %(node)s, check ' 'its power management configuration: ' '%(exc)s') % {'node': node_info.uuid, 'exc': exc}) raise utils.Error(msg, node_info=node_info, data=introspection_data) LOG.info('Node powered-off', node_info=node_info, data=introspection_data) node_info.finished(istate.Events.finish) LOG.info('Introspection finished successfully', node_info=node_info, data=introspection_data)
但谈何容易。