openstack nova创建虚拟机过程(二)

在openstack nova创建虚拟机过程(一)中,分析结束在/nova/conductor/rpcapi.py函数self.conductor_compute_rpcapi.schedule_and_build_instances(),如下:

def schedule_and_build_instances(self, context, build_requests,
                                     request_specs,
                                     image, admin_password, injected_files,
                                     requested_networks,
                                     block_device_mapping,
                                     tags=None):
        version = '1.17'
        kw = {'build_requests': build_requests,
              'request_specs': request_specs,
              'image': jsonutils.to_primitive(image),
              'admin_password': admin_password,
              'injected_files': injected_files,
              'requested_networks': requested_networks,
              'block_device_mapping': block_device_mapping,
              'tags': tags}

        if not self.client.can_send_version(version):
            version = '1.16'
            del kw['tags']

        cctxt = self.client.prepare(version=version)
        # 发送异步rpc消息
        cctxt.cast(context, 'schedule_and_build_instances', **kw)

它通过cast的方式将消息发送到消息总线上,等待nova-conductor处理。

注明:RPC.call()发送消息到队列,等待返回。阻塞;RPC.cast()发送消息到队列,不等待返回继续执行,非阻塞。

nova-conductor订阅了执行schedule_and_build_instances的消息,将调用/nova/conductor/manager.py中该函数,如下:

    def schedule_and_build_instances(self, context, build_requests,
                                     request_specs, image,
                                     admin_password, injected_files,
                                     requested_networks, block_device_mapping,
                                     tags=None):
        # Add all the UUIDs for the instances
        instance_uuids = [spec.instance_uuid for spec in request_specs]
        try:
# 返回适合创建虚拟机的主机列表,调用顺序:
# /nova/conductor/manager.py:_schedule_instances() --> /nova/scheduler/client/__init__.py:select_destinations() --> /nova/scheduler/client/query.py:select_destinations()
# --> /nova/scheduler/rpcapi.py:select_destinations(),向消息队列发送执行selet_destinations请求,使用call方法,等待nova-scheduler返回适合的主机

host_lists
= self._schedule_instances(context, request_specs[0], instance_uuids, return_alternates=True) except Exception as exc: LOG.exception('Failed to schedule instances') self._bury_in_cell0(context, request_specs[0], exc, build_requests=build_requests) return host_mapping_cache = {} cell_mapping_cache = {} instances = [] for (build_request, request_spec, host_list) in six.moves.zip( build_requests, request_specs, host_lists): instance = build_request.get_new_instance(context) # host_list is a list of one or more Selection objects, the first # of which has been selected and its resources claimed. host = host_list[0] # Convert host from the scheduler into a cell record if host.service_host not in host_mapping_cache: try: host_mapping = objects.HostMapping.get_by_host( context, host.service_host) host_mapping_cache[host.service_host] = host_mapping except exception.HostMappingNotFound as exc: LOG.error('No host-to-cell mapping found for selected ' 'host %(host)s. Setup is incomplete.', {'host': host.service_host}) self._bury_in_cell0(context, request_spec, exc, build_requests=[build_request], instances=[instance]) # This is a placeholder in case the quota recheck fails. instances.append(None) continue else: host_mapping = host_mapping_cache[host.service_host] cell = host_mapping.cell_mapping # Before we create the instance, let's make one final check that # the build request is still around and wasn't deleted by the user # already. try: objects.BuildRequest.get_by_instance_uuid( context, instance.uuid) except exception.BuildRequestNotFound: # the build request is gone so we're done for this instance LOG.debug('While scheduling instance, the build request ' 'was already deleted.', instance=instance) # This is a placeholder in case the quota recheck fails. instances.append(None) rc = self.scheduler_client.reportclient rc.delete_allocation_for_instance(context, instance.uuid) continue else: instance.availability_zone = ( availability_zones.get_host_availability_zone( context, host.service_host)) with obj_target_cell(instance, cell): instance.create() instances.append(instance) cell_mapping_cache[instance.uuid] = cell # NOTE(melwitt): We recheck the quota after creating the # objects to prevent users from allocating more resources # than their allowed quota in the event of a race. This is # configurable because it can be expensive if strict quota # limits are not required in a deployment. if CONF.quota.recheck_quota: try: compute_utils.check_num_instances_quota( context, instance.flavor, 0, 0, orig_num_req=len(build_requests)) except exception.TooManyInstances as exc: with excutils.save_and_reraise_exception(): self._cleanup_build_artifacts(context, exc, instances, build_requests, request_specs, cell_mapping_cache) zipped = six.moves.zip(build_requests, request_specs, host_lists, instances) for (build_request, request_spec, host_list, instance) in zipped: if instance is None: # Skip placeholders that were buried in cell0 or had their # build requests deleted by the user before instance create. continue cell = cell_mapping_cache[instance.uuid] # host_list is a list of one or more Selection objects, the first # of which has been selected and its resources claimed. host = host_list.pop(0) alts = [(alt.service_host, alt.nodename) for alt in host_list] LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s", host.service_host, host.nodename, alts, instance=instance) filter_props = request_spec.to_legacy_filter_properties_dict() scheduler_utils.populate_retry(filter_props, instance.uuid) scheduler_utils.populate_filter_properties(filter_props, host) # TODO(melwitt): Maybe we should set_target_cell on the contexts # once we map to a cell, and remove these separate with statements. with obj_target_cell(instance, cell) as cctxt: # send a state update notification for the initial create to # show it going from non-existent to BUILDING # This can lazy-load attributes on instance. notifications.send_update_with_states(cctxt, instance, None, vm_states.BUILDING, None, None, service="conductor") objects.InstanceAction.action_start( cctxt, instance.uuid, instance_actions.CREATE, want_result=False) instance_bdms = self._create_block_device_mapping( cell, instance.flavor, instance.uuid, block_device_mapping) instance_tags = self._create_tags(cctxt, instance.uuid, tags) # TODO(Kevin Zheng): clean this up once instance.create() handles # tags; we do this so the instance.create notification in # build_and_run_instance in nova-compute doesn't lazy-load tags instance.tags = instance_tags if instance_tags \ else objects.TagList() # Update mapping for instance. Normally this check is guarded by # a try/except but if we're here we know that a newer nova-api # handled the build process and would have created the mapping inst_mapping = objects.InstanceMapping.get_by_instance_uuid( context, instance.uuid) inst_mapping.cell_mapping = cell inst_mapping.save() if not self._delete_build_request( context, build_request, instance, cell, instance_bdms, instance_tags): # The build request was deleted before/during scheduling so # the instance is gone and we don't have anything to build for # this one. continue # NOTE(danms): Compute RPC expects security group names or ids # not objects, so convert this to a list of names until we can # pass the objects. legacy_secgroups = [s.identifier for s in request_spec.security_groups] with obj_target_cell(instance, cell) as cctxt: self.compute_rpcapi.build_and_run_instance( cctxt, instance=instance, image=image, request_spec=request_spec, filter_properties=filter_props, admin_password=admin_password, injected_files=injected_files, requested_networks=requested_networks, security_groups=legacy_secgroups, block_device_mapping=instance_bdms, host=host.service_host, node=host.nodename, limits=host.limits, host_list=host_list)

 之后,进入/nova/compute/rpcapi.py中,向消息队列发送非阻塞消息请求执行build_and_run_instance(),如下:

    def build_and_run_instance(self, ctxt, instance, host, image, request_spec,
            filter_properties, admin_password=None, injected_files=None,
            requested_networks=None, security_groups=None,
            block_device_mapping=None, node=None, limits=None,
            host_list=None):
        # NOTE(edleafe): compute nodes can only use the dict form of limits.
        if isinstance(limits, objects.SchedulerLimits):
            limits = limits.to_dict()
        kwargs = {"instance": instance,
                  "image": image,
                  "request_spec": request_spec,
                  "filter_properties": filter_properties,
                  "admin_password": admin_password,
                  "injected_files": injected_files,
                  "requested_networks": requested_networks,
                  "security_groups": security_groups,
                  "block_device_mapping": block_device_mapping,
                  "node": node,
                  "limits": limits,
                  "host_list": host_list,
                 }
        client = self.router.client(ctxt)
        version = '4.19'
        if not client.can_send_version(version):
            version = '4.0'
            kwargs.pop("host_list")
        cctxt = client.prepare(server=host, version=version)
        cctxt.cast(ctxt, 'build_and_run_instance', **kwargs)

nova-compute订阅了执行build_and_run_instance()的消息,并在/nova/compute/manager.py中执行该函数,如下:

    def build_and_run_instance(self, context, instance, image, request_spec,
                     filter_properties, admin_password=None,
                     injected_files=None, requested_networks=None,
                     security_groups=None, block_device_mapping=None,
                     node=None, limits=None, host_list=None):

        @utils.synchronized(instance.uuid)
        def _locked_do_build_and_run_instance(*args, **kwargs):
            # NOTE(danms): We grab the semaphore with the instance uuid
            # locked because we could wait in line to build this instance
            # for a while and we want to make sure that nothing else tries
            # to do anything with this instance while we wait.
            with self._build_semaphore:
                try:
                    result = self._do_build_and_run_instance(*args, **kwargs)
                except Exception:
                    # NOTE(mriedem): This should really only happen if
                    # _decode_files in _do_build_and_run_instance fails, and
                    # that's before a guest is spawned so it's OK to remove
                    # allocations for the instance for this node from Placement
                    # below as there is no guest consuming resources anyway.
                    # The _decode_files case could be handled more specifically
                    # but that's left for another day.
                    result = build_results.FAILED
                    raise
                finally:
                    if result == build_results.FAILED:
                        # Remove the allocation records from Placement for the
                        # instance if the build failed. The instance.host is
                        # likely set to None in _do_build_and_run_instance
                        # which means if the user deletes the instance, it
                        # will be deleted in the API, not the compute service.
                        # Setting the instance.host to None in
                        # _do_build_and_run_instance means that the
                        # ResourceTracker will no longer consider this instance
                        # to be claiming resources against it, so we want to
                        # reflect that same thing in Placement.  No need to
                        # call this for a reschedule, as the allocations will
                        # have already been removed in
                        # self._do_build_and_run_instance().
                        self._delete_allocation_for_instance(context,
                                                             instance.uuid)

                    if result in (build_results.FAILED,
                                  build_results.RESCHEDULED):
                        self._build_failed()
                    else:
                        self._failed_builds = 0

        # NOTE(danms): We spawn here to return the RPC worker thread back to
        # the pool. Since what follows could take a really long time, we don't
        # want to tie up RPC workers.
        utils.spawn_n(_locked_do_build_and_run_instance,
                      context, instance, image, request_spec,
                      filter_properties, admin_password, injected_files,
                      requested_networks, security_groups,
                      block_device_mapping, node, limits, host_list)

调用/nova/compute/manager.py下的_do_build_and_run_instance(),如下:

    def _do_build_and_run_instance(self, context, instance, image,
            request_spec, filter_properties, admin_password, injected_files,
            requested_networks, security_groups, block_device_mapping,
            node=None, limits=None, host_list=None):

        try:
            LOG.debug('Starting instance...', instance=instance)
            instance.vm_state = vm_states.BUILDING
            instance.task_state = None
            instance.save(expected_task_state=
                    (task_states.SCHEDULING, None))
        except exception.InstanceNotFound:
            msg = 'Instance disappeared before build.'
            LOG.debug(msg, instance=instance)
            return build_results.FAILED
        except exception.UnexpectedTaskStateError as e:
            LOG.debug(e.format_message(), instance=instance)
            return build_results.FAILED

        # b64 decode the files to inject:
        decoded_files = self._decode_files(injected_files)

        if limits is None:
            limits = {}

        if node is None:
            node = self._get_nodename(instance, refresh=True)

        try:
            with timeutils.StopWatch() as timer:
                self._build_and_run_instance(context, instance, image,
                        decoded_files, admin_password, requested_networks,
                        security_groups, block_device_mapping, node, limits,
                        filter_properties, request_spec)
            LOG.info('Took %0.2f seconds to build instance.',
                     timer.elapsed(), instance=instance)
            return build_results.ACTIVE
        except exception.RescheduledException as e:
            retry = filter_properties.get('retry')
            if not retry:
                # no retry information, do not reschedule.
                LOG.debug("Retry info not present, will not reschedule",
                    instance=instance)
                self._cleanup_allocated_networks(context, instance,
                    requested_networks)
                self._cleanup_volumes(context, instance.uuid,
                    block_device_mapping, raise_exc=False)
                compute_utils.add_instance_fault_from_exc(context,
                        instance, e, sys.exc_info(),
                        fault_message=e.kwargs['reason'])
                self._nil_out_instance_obj_host_and_node(instance)
                self._set_instance_obj_error_state(context, instance,
                                                   clean_task_state=True)
                return build_results.FAILED
            LOG.debug(e.format_message(), instance=instance)
            # This will be used for logging the exception
            retry['exc'] = traceback.format_exception(*sys.exc_info())
            # This will be used for setting the instance fault message
            retry['exc_reason'] = e.kwargs['reason']
            # NOTE(comstud): Deallocate networks if the driver wants
            # us to do so.
            # NOTE(vladikr): SR-IOV ports should be deallocated to
            # allow new sriov pci devices to be allocated on a new host.
            # Otherwise, if devices with pci addresses are already allocated
            # on the destination host, the instance will fail to spawn.
            # info_cache.network_info should be present at this stage.
            if (self.driver.deallocate_networks_on_reschedule(instance) or
                self.deallocate_sriov_ports_on_reschedule(instance)):
                self._cleanup_allocated_networks(context, instance,
                        requested_networks)
            else:
                # NOTE(alex_xu): Network already allocated and we don't
                # want to deallocate them before rescheduling. But we need
                # to cleanup those network resources setup on this host before
                # rescheduling.
                self.network_api.cleanup_instance_network_on_host(
                    context, instance, self.host)

            self._nil_out_instance_obj_host_and_node(instance)
            instance.task_state = task_states.SCHEDULING
            instance.save()
            # The instance will have already claimed resources from this host
            # before this build was attempted. Now that it has failed, we need
            # to unclaim those resources before casting to the conductor, so
            # that if there are alternate hosts available for a retry, it can
            # claim resources on that new host for the instance.
            self._delete_allocation_for_instance(context, instance.uuid)

            self.compute_task_api.build_instances(context, [instance],
                    image, filter_properties, admin_password,
                    injected_files, requested_networks, security_groups,
                    block_device_mapping, request_spec=request_spec,
                    host_lists=[host_list])
            return build_results.RESCHEDULED
        except (exception.InstanceNotFound,
                exception.UnexpectedDeletingTaskStateError):
            msg = 'Instance disappeared during build.'
            LOG.debug(msg, instance=instance)
            self._cleanup_allocated_networks(context, instance,
                    requested_networks)
            return build_results.FAILED
        except exception.BuildAbortException as e:
            LOG.exception(e.format_message(), instance=instance)
            self._cleanup_allocated_networks(context, instance,
                    requested_networks)
            self._cleanup_volumes(context, instance.uuid,
                    block_device_mapping, raise_exc=False)
            compute_utils.add_instance_fault_from_exc(context, instance,
                    e, sys.exc_info())
            self._nil_out_instance_obj_host_and_node(instance)
            self._set_instance_obj_error_state(context, instance,
                                               clean_task_state=True)
            return build_results.FAILED
        except Exception as e:
            # Should not reach here.
            LOG.exception('Unexpected build failure, not rescheduling build.',
                          instance=instance)
            self._cleanup_allocated_networks(context, instance,
                    requested_networks)
            self._cleanup_volumes(context, instance.uuid,
                    block_device_mapping, raise_exc=False)
            compute_utils.add_instance_fault_from_exc(context, instance,
                    e, sys.exc_info())
            self._nil_out_instance_obj_host_and_node(instance)
            self._set_instance_obj_error_state(context, instance,
                                               clean_task_state=True)
            return build_results.FAILED

继续在本文件下调用_build_and_run_instance()开始在hypervisor上孵化虚拟机,如下:

    def _build_and_run_instance(self, context, instance, image, injected_files,
            admin_password, requested_networks, security_groups,
            block_device_mapping, node, limits, filter_properties,
            request_spec=None):

        image_name = image.get('name')
        self._notify_about_instance_usage(context, instance, 'create.start',
                extra_usage_info={'image_name': image_name})
        compute_utils.notify_about_instance_create(
            context, instance, self.host,
            phase=fields.NotificationPhase.START,
            bdms=block_device_mapping)

        # NOTE(mikal): cache the keystone roles associated with the instance
        # at boot time for later reference
        instance.system_metadata.update(
            {'boot_roles': ','.join(context.roles)})

        self._check_device_tagging(requested_networks, block_device_mapping)

        try:
            scheduler_hints = self._get_scheduler_hints(filter_properties,
                                                        request_spec)
            rt = self._get_resource_tracker()
            with rt.instance_claim(context, instance, node, limits):
                # NOTE(russellb) It's important that this validation be done
                # *after* the resource tracker instance claim, as that is where
                # the host is set on the instance.
                self._validate_instance_group_policy(context, instance,
                                                     scheduler_hints)
                image_meta = objects.ImageMeta.from_dict(image)
                with self._build_resources(context, instance,
                        requested_networks, security_groups, image_meta,
                        block_device_mapping) as resources:
                    instance.vm_state = vm_states.BUILDING
                    instance.task_state = task_states.SPAWNING
                    # NOTE(JoshNang) This also saves the changes to the
                    # instance from _allocate_network_async, as they aren't
                    # saved in that function to prevent races.
                    instance.save(expected_task_state=
                            task_states.BLOCK_DEVICE_MAPPING)
                    block_device_info = resources['block_device_info']
                    network_info = resources['network_info']
                    allocs = resources['allocations']
                    LOG.debug('Start spawning the instance on the hypervisor.',
                              instance=instance)
                    with timeutils.StopWatch() as timer:
# 开始在hypervisor上孵化虚拟机 self.driver.spawn(context, instance, image_meta, injected_files, admin_password, allocs, network_info
=network_info, block_device_info=block_device_info) LOG.info('Took %0.2f seconds to spawn the instance on ' 'the hypervisor.', timer.elapsed(), instance=instance) except (exception.InstanceNotFound, exception.UnexpectedDeletingTaskStateError) as e: with excutils.save_and_reraise_exception(): self._notify_about_instance_usage(context, instance, 'create.error', fault=e) compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, bdms=block_device_mapping) except exception.ComputeResourcesUnavailable as e: LOG.debug(e.format_message(), instance=instance) self._notify_about_instance_usage(context, instance, 'create.error', fault=e) compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, bdms=block_device_mapping) raise exception.RescheduledException( instance_uuid=instance.uuid, reason=e.format_message()) except exception.BuildAbortException as e: with excutils.save_and_reraise_exception(): LOG.debug(e.format_message(), instance=instance) self._notify_about_instance_usage(context, instance, 'create.error', fault=e) compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, bdms=block_device_mapping) except (exception.FixedIpLimitExceeded, exception.NoMoreNetworks, exception.NoMoreFixedIps) as e: LOG.warning('No more network or fixed IP to be allocated', instance=instance) self._notify_about_instance_usage(context, instance, 'create.error', fault=e) compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, bdms=block_device_mapping) msg = _('Failed to allocate the network(s) with error %s, ' 'not rescheduling.') % e.format_message() raise exception.BuildAbortException(instance_uuid=instance.uuid, reason=msg) except (exception.VirtualInterfaceCreateException, exception.VirtualInterfaceMacAddressException, exception.FixedIpInvalidOnHost, exception.UnableToAutoAllocateNetwork) as e: LOG.exception('Failed to allocate network(s)', instance=instance) self._notify_about_instance_usage(context, instance, 'create.error', fault=e) compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, bdms=block_device_mapping) msg = _('Failed to allocate the network(s), not rescheduling.') raise exception.BuildAbortException(instance_uuid=instance.uuid, reason=msg) except (exception.FlavorDiskTooSmall, exception.FlavorMemoryTooSmall, exception.ImageNotActive, exception.ImageUnacceptable, exception.InvalidDiskInfo, exception.InvalidDiskFormat, cursive_exception.SignatureVerificationError, exception.VolumeEncryptionNotSupported, exception.InvalidInput) as e: self._notify_about_instance_usage(context, instance, 'create.error', fault=e) compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, bdms=block_device_mapping) raise exception.BuildAbortException(instance_uuid=instance.uuid, reason=e.format_message()) except Exception as e: self._notify_about_instance_usage(context, instance, 'create.error', fault=e) compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, bdms=block_device_mapping) raise exception.RescheduledException( instance_uuid=instance.uuid, reason=six.text_type(e)) # NOTE(alaski): This is only useful during reschedules, remove it now. instance.system_metadata.pop('network_allocated', None) # If CONF.default_access_ip_network_name is set, grab the # corresponding network and set the access ip values accordingly. network_name = CONF.default_access_ip_network_name if (network_name and not instance.access_ip_v4 and not instance.access_ip_v6): # Note that when there are multiple ips to choose from, an # arbitrary one will be chosen. for vif in network_info: if vif['network']['label'] == network_name: for ip in vif.fixed_ips(): if not instance.access_ip_v4 and ip['version'] == 4: instance.access_ip_v4 = ip['address'] if not instance.access_ip_v6 and ip['version'] == 6: instance.access_ip_v6 = ip['address'] break self._update_instance_after_spawn(context, instance) try: instance.save(expected_task_state=task_states.SPAWNING) except (exception.InstanceNotFound, exception.UnexpectedDeletingTaskStateError) as e: with excutils.save_and_reraise_exception(): self._notify_about_instance_usage(context, instance, 'create.error', fault=e) compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, bdms=block_device_mapping) self._update_scheduler_instance_info(context, instance) self._notify_about_instance_usage(context, instance, 'create.end', extra_usage_info={'message': _('Success')}, network_info=network_info) compute_utils.notify_about_instance_create(context, instance, self.host, phase=fields.NotificationPhase.END, bdms=block_device_mapping)

 开始在hypervisor上孵化虚拟机需要调用hypervisor的相应python API驱动,在/nova/virt/下可以看到多种驱动程序,如下:

 

包括libvirt, powervm, vmwareapi, xenapi;我们使用libvirt,那么就会调用到/libvirt/driver.py下spawn()函数,如下:

    def spawn(self, context, instance, image_meta, injected_files,
              admin_password, allocations, network_info=None,
              block_device_info=None):
# 获取disk配置信息 disk_info
= blockinfo.get_disk_info(CONF.libvirt.virt_type, instance, image_meta, block_device_info) injection_info = InjectionInfo(network_info=network_info, files=injected_files, admin_pass=admin_password) gen_confdrive = functools.partial(self._create_configdrive, context, instance, injection_info)
# 处理镜像 self._create_image(context, instance, disk_info[
'mapping'], injection_info=injection_info, block_device_info=block_device_info) # Required by Quobyte CI self._ensure_console_log_for_instance(instance) # Does the guest need to be assigned some vGPU mediated devices ? mdevs = self._allocate_mdevs(allocations)
# 将当前参数配置转化成创建虚拟机的xml文件 xml
= self._get_guest_xml(context, instance, network_info, disk_info, image_meta, block_device_info=block_device_info, mdevs=mdevs)
# 设置网络,创建实例 self._create_domain_and_network( context, xml, instance, network_info, block_device_info
=block_device_info, post_xml_callback=gen_confdrive, destroy_disks_on_failure=True) LOG.debug("Instance is running", instance=instance) def _wait_for_boot(): """Called at an interval until the VM is running.""" state = self.get_info(instance).state if state == power_state.RUNNING: LOG.info("Instance spawned successfully.", instance=instance) raise loopingcall.LoopingCallDone() timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot) timer.start(interval=0.5).wait()

 本文件下调用的_create_domain_and_network()如下

   def _create_domain_and_network(self, context, xml, instance, network_info,
                                   block_device_info=None, power_on=True,
                                   vifs_already_plugged=False,
                                   post_xml_callback=None,
                                   destroy_disks_on_failure=False):

        """Do required network setup and create domain."""
        timeout = CONF.vif_plugging_timeout
        if (self._conn_supports_start_paused and
            utils.is_neutron() and not
            vifs_already_plugged and power_on and timeout):
            events = self._get_neutron_events(network_info)
        else:
            events = []

        pause = bool(events)
        guest = None
        try:
            with self.virtapi.wait_for_instance_event(
                    instance, events, deadline=timeout,
                    error_callback=self._neutron_failed_callback):
                self.plug_vifs(instance, network_info)
                self.firewall_driver.setup_basic_filtering(instance,
                                                           network_info)
                self.firewall_driver.prepare_instance_filter(instance,
                                                             network_info)
                with self._lxc_disk_handler(context, instance,
                                            instance.image_meta,
                                            block_device_info):
                    guest = self._create_domain(
                        xml, pause=pause, power_on=power_on,
                        post_xml_callback=post_xml_callback)

                self.firewall_driver.apply_instance_filter(instance,
                                                           network_info)
        except exception.VirtualInterfaceCreateException:
            # Neutron reported failure and we didn't swallow it, so
            # bail here
            with excutils.save_and_reraise_exception():
                self._cleanup_failed_start(context, instance, network_info,
                                           block_device_info, guest,
                                           destroy_disks_on_failure)
        except eventlet.timeout.Timeout:
            # We never heard from Neutron
            LOG.warning('Timeout waiting for %(events)s for '
                        'instance with vm_state %(vm_state)s and '
                        'task_state %(task_state)s.',
                        {'events': events,
                         'vm_state': instance.vm_state,
                         'task_state': instance.task_state},
                        instance=instance)
            if CONF.vif_plugging_is_fatal:
                self._cleanup_failed_start(context, instance, network_info,
                                           block_device_info, guest,
                                           destroy_disks_on_failure)
                raise exception.VirtualInterfaceCreateException()
        except Exception:
            # Any other error, be sure to clean up
            LOG.error('Failed to start libvirt guest', instance=instance)
            with excutils.save_and_reraise_exception():
                self._cleanup_failed_start(context, instance, network_info,
                                           block_device_info, guest,
                                           destroy_disks_on_failure)

        # Resume only if domain has been paused
        if pause:
            guest.resume()
        return guest

本文件下调用的_create_domain()如下:

    def _create_domain(self, xml=None, domain=None,
                       power_on=True, pause=False, post_xml_callback=None):
        """Create a domain.

        Either domain or xml must be passed in. If both are passed, then
        the domain definition is overwritten from the xml.

        :returns guest.Guest: Guest just created
        """
        if xml:
# 调用libvirt定义虚拟机 guest
= libvirt_guest.Guest.create(xml, self._host) if post_xml_callback is not None: post_xml_callback() else: guest = libvirt_guest.Guest(domain) if power_on or pause:
# 调用libvirt启动虚拟机 guest.launch(pause
=pause) if not utils.is_neutron(): guest.enable_hairpin() return guest

总结:到此,基本梳理出了openstack创建虚拟机的基本代码运行流程:openstack cli发送创建虚拟机的RESTful url --->>> nova-api接收到url请求,路由到/nova/api/openstack/compute/servers.py下的具体controller方法(def create())开始创建流程,执行到/nova/conductor/rpcapi.py下self.conductor_compute_rpcapi.schedule_and_build_instances()函数,向rabbitmq消息队列发送“执行schedule_and_build_instances”请求 --->>> nova-conductor接收到rpc请求,执行/nova/conductor/manager.py下schedule_and_build_instances()函数,期间向nova-scheduler发送rpc请求去获取到host_lists(适合创建虚拟机的主机列表),一系列处理后向rabbitmq消息队列发送“执行build_and_run_instance”请求 --->>> nova-compute服务获取到rpc请求,执行/nova/compute/manager.py中build_and_run_instance()函数,然后调用到self.driver.spawn(),开始调用hypervisor驱动程序孵化虚拟机,最后完成虚拟机的定义和启动。

 

 

posted on 2018-02-07 14:24  carrot_hrp  阅读(2181)  评论(0编辑  收藏  举报

导航