cinder migrate基础内容-源码分析

 

一、cinder-api服务入口

D:\code-program\cinder-codejuno\api\contrib\admin_actions.py

from cinder import volume
class VolumeAdminController(AdminController):
    """AdminController for Volumes."""
    @wsgi.action('os-migrate_volume')
    def _migrate_volume(self, req, id, body):
        """Migrate a volume to the specified host."""
        context = req.environ['cinder.context']
        self.authorize(context, 'migrate_volume')
        try:
            volume = self._get(context, id)--------根据volume id获取卷对象
        except exception.NotFound:
            raise exc.HTTPNotFound()
        params = body['os-migrate_volume']-----获取request请求体中参数
        try:
            host = params['host']-------卷要迁移到的主机
        except KeyError:
            raise exc.HTTPBadRequest(explanation=_("Must specify 'host'"))
        force_host_copy = params.get('force_host_copy', False)------从请求体中,获取force_host_copy的值,默认情况下,force_host_copy为False,
        if isinstance(force_host_copy, basestring):
            try:
                force_host_copy = strutils.bool_from_string(force_host_copy,-----字符串类型转化为bool类型
                                                            strict=True)
            except ValueError:
                raise exc.HTTPBadRequest(
                    explanation=_("Bad value for 'force_host_copy'"))
        elif not isinstance(force_host_copy, bool):
            raise exc.HTTPBadRequest(
                explanation=_("'force_host_copy' not string or bool"))
        self.volume_api.migrate_volume(context, volume, host, force_host_copy)----调用volume.API类里面的方法 步骤一
        return webob.Response(status_int=202)

对步骤一进行详解
D:\code-program\cinder-codejuno\volume\api.py

class API(base.Base):
    """API for interacting with the volume manager."""

    @wrap_check_policy
    def migrate_volume(self, context, volume, host, force_host_copy):
        """Migrate the volume to the specified host."""

        # We only handle "available" volumes for now
        if volume['status'] not in ['available', 'in-use']:------这卷的状态进行判断,只有available', 'in-use这两种状态有效
            msg = _('Volume status must be available/in-use.')
            LOG.error(msg)
            raise exception.InvalidVolume(reason=msg)

        # Make sure volume is not part of a migration
        if volume['migration_status'] is not None:-----确保卷不是出于迁移的状态
            msg = _("Volume is already part of an active migration")
            raise exception.InvalidVolume(reason=msg)

        # We only handle volumes without snapshots for now-----------判断卷是否有快照,如果有,那么就抛出异常
        snaps = self.db.snapshot_get_all_for_volume(context, volume['id'])
        if snaps:
            msg = _("volume must not have snapshots")
            LOG.error(msg)
            raise exception.InvalidVolume(reason=msg)

        # We only handle non-replicated volumes for now
        rep_status = volume['replication_status']
        if rep_status is not None and rep_status != 'disabled':
            msg = _("Volume must not be replicated.")
            LOG.error(msg)
            raise exception.InvalidVolume(reason=msg)

        cg_id = volume.get('consistencygroup_id', None)
        if cg_id:
            msg = _("Volume must not be part of a consistency group.")
            LOG.error(msg)
            raise exception.InvalidVolume(reason=msg)

        #Make juno volume migration api support icehouse.
        #Default lvm backend pool is LVM_iSCSI,
        # gluster backend poll is GlusterFS.
        if "#" not in host:-----------为了兼容ice版本,对host进行改造
            backend_driver = host.split("@")[1]
            if backend_driver == "lvmdriver":
                host = host + "#LVM_iSCSI"
            elif backend_driver == "GLUSTERFS_DRIVER1":
                host = host + "#GlusterFS"
            else:
                msg = _("Volume host is bad format.")
                raise exception.InvalidVolume(reason=msg)

        # Make sure the host is in the list of available hosts
        elevated = context.elevated()
        topic = CONF.volume_topic
        services = self.db.service_get_all_by_topic(elevated,
                                                    topic,
                                                    disabled=False)
        found = False
        for service in services:
            svc_host = volume_utils.extract_host(host, 'backend')-------对卷要迁移到的主机的service状态,进行判断,是否有效,首先该主机的cinder-volume服务状态是up的
            if utils.service_is_up(service) and service['host'] == svc_host:
                found = True
        if not found:
            msg = (_('No available service named %s') % host)
            LOG.error(msg)
            raise exception.InvalidHost(reason=msg)

        # Make sure the destination host is different than the current one
        if host == volume['host']:-------------卷要迁移到的主机必须与卷目前所在的主机不一样
            msg = _('Destination host must be different than current host')
            LOG.error(msg)
            raise exception.InvalidHost(reason=msg)

        self.update(context, volume, {'migration_status': 'starting'})-----此时更新卷的状态的migration_status:starting

        # Call the scheduler to ensure that the host exists and that it can
        # accept the volume
        volume_type = {}
        volume_type_id = volume['volume_type_id']
        if volume_type_id:
            volume_type = volume_types.get_volume_type(context, volume_type_id)
        request_spec = {'volume_properties': volume,
                        'volume_type': volume_type,
                        'volume_id': volume['id']}
        self.scheduler_rpcapi.migrate_volume_to_host(context,------给exchange为cinder-volume的发送rpc请求--对步骤1.1详解
                                                     CONF.volume_topic,
                                                     volume['id'],
                                                     host,
                                                     force_host_copy,
                                                     request_spec)

对步骤1.1详解

D:\code-program\cinder-codejuno\scheduler\rpcapi.py,给rabbitmq中,exchange为cinder-volume的发送rpc.cast请求

class SchedulerAPI(object):
    '''Client side of the scheduler rpc API.
    def migrate_volume_to_host(self, ctxt, topic, volume_id, host,
                               force_host_copy=False, request_spec=None,
                               filter_properties=None):

        cctxt = self.client.prepare(version='1.3')
        request_spec_p = jsonutils.to_primitive(request_spec)
        return cctxt.cast(ctxt, 'migrate_volume_to_host',
                          topic=topic,
                          volume_id=volume_id,
                          host=host,
                          force_host_copy=force_host_copy,
                          request_spec=request_spec_p,
                          filter_properties=filter_properties)

 二、cinder-scheduler 接受rpc请求,对迁移的主机进行判断 

D:\code-program\cinder-codejuno\scheduler\manager.py
from cinder.volume import rpcapi as volume_rpcapi
# Default scheduler driver to use (string value)
#scheduler_driver=cinder.scheduler.filter_scheduler.FilterScheduler
class SchedulerManager(manager.Manager):
    """Chooses a host to create volumes."""

    def migrate_volume_to_host(self, context, topic, volume_id, host,
                               force_host_copy, request_spec,
                               filter_properties=None):
        """Ensure that the host exists and can accept the volume."""

        def _migrate_volume_set_error(self, context, ex, request_spec):
            volume_state = {'volume_state': {'migration_status': None}}
            self._set_volume_state_and_notify('migrate_volume_to_host',
                                              volume_state,
                                              context, ex, request_spec)

        try:
            tgt_host = self.driver.host_passes_filters(context, host,-----------driver的取值是配置文件中scheduler_driver的取值--对步骤二进行详解
                                                       request_spec,
                                                       filter_properties)
        except exception.NoValidHost as ex:
            _migrate_volume_set_error(self, context, ex, request_spec)
        except Exception as ex:
            with excutils.save_and_reraise_exception():
                _migrate_volume_set_error(self, context, ex, request_spec)
        else:
            volume_ref = db.volume_get(context, volume_id)-----从数据库中,获取卷的信息
            volume_rpcapi.VolumeAPI().migrate_volume(context, volume_ref,-----对步骤三进行详解
                                                     tgt_host,----目标主机
                                                     force_host_copy)

对步骤二进行详解					  
D:\code-program\cinder-codejuno\scheduler\filter_scheduler.py
class FilterScheduler(driver.Scheduler):
    """Scheduler that can be used for filtering and weighing."""
    def host_passes_filters(self, context, host, request_spec,
                            filter_properties):
        """Check if the specified host passes the filters."""
        weighed_hosts = self._get_weighted_candidates(context, request_spec,------根据请求卷的特性,过滤出可用的存储节点
                                                      filter_properties)
        for weighed_host in weighed_hosts:------对过滤出来的可用存储节点中,寻找是否有request请求参数中,携带的host的主机信息,如果有,则返回该主机的状态,否则抛出异常
            host_state = weighed_host.obj
            if host_state.host == host:
                return host_state

        msg = (_('Cannot place volume %(id)s on %(host)s')
               % {'id': request_spec['volume_id'], 'host': host})
        raise exception.NoValidHost(reason=msg)	
		
对步骤三进行详解
给cinder-volume发送rpc.cast请求,进行卷的迁移
D:\code-program\cinder-codejuno\volume\rpcapi.py
class VolumeAPI(object):
    '''Client side of the volume rpc API.
    def migrate_volume(self, ctxt, volume, dest_host, force_host_copy):
        new_host = utils.extract_host(volume['host'])----获取卷所在的host主机
        cctxt = self.client.prepare(server=new_host, version='1.8')----更改rcp.client的环境信息,向卷所在的特定主机host发送rpc 请求
        host_p = {'host': dest_host.host,-----卷迁移的目标主机
                  'capabilities': dest_host.capabilities}
        cctxt.cast(ctxt, 'migrate_volume', volume_id=volume['id'],
                   host=host_p, force_host_copy=force_host_copy)

 三、卷所在的主机的cinder-volume服务接受rpc请求 

# Driver to use for volume creation (string value)
#volume_driver=cinder.volume.drivers.lvm.LVMISCSIDriver
D:\code-program\cinder-codejuno\volume\manager.py
class VolumeManager(manager.SchedulerDependentManager):
    """Manages attachable block storage devices."""
    RPC_API_VERSION = '1.19'
    target = messaging.Target(version=RPC_API_VERSION)

    def migrate_volume(self, ctxt, volume_id, host, force_host_copy=False,
                       new_type_id=None):
        """Migrate the volume to the specified host (called on source host)."""
        try:
            # NOTE(flaper87): Verify the driver is enabled
            # before going forward. The exception will be caught
            # and the migration status updated.
            utils.require_driver_initialized(self.driver)-----后端存储驱动的初始化
        except exception.DriverNotInitialized:
            with excutils.save_and_reraise_exception():
                self.db.volume_update(ctxt, volume_id,
                                      {'migration_status': 'error'})

        volume_ref = self.db.volume_get(ctxt, volume_id)----根据volumeid 获取卷的信息
        model_update = None
        moved = False

        status_update = None
        if volume_ref['status'] == 'retyping':
            status_update = {'status': self._get_original_status(volume_ref)}

        self.db.volume_update(ctxt, volume_ref['id'],
                              {'migration_status': 'migrating'})------更新卷的状态的migration_status:migrating
        if not force_host_copy and new_type_id is None:------默认force_host_copy为假,走这个分支,由后端存储驱动的迁移卷函数完成卷迁移
            try:
                LOG.debug("volume %s: calling driver migrate_volume",
                          volume_ref['id'])
                moved, model_update = self.driver.migrate_volume(ctxt,-----------步骤四详解
                                                                 volume_ref,
                                                                 host)
                if moved:
                    updates = {'host': host['host'],
                               'migration_status': None}
                    if status_update:
                        updates.update(status_update)
                    if model_update:
                        updates.update(model_update)
                    volume_ref = self.db.volume_update(ctxt,
                                                       volume_ref['id'],
                                                       updates)
            except Exception:
                with excutils.save_and_reraise_exception():
                    updates = {'migration_status': None}
                    if status_update:
                        updates.update(status_update)
                    model_update = self.driver.create_export(ctxt, volume_ref)
                    if model_update:
                        updates.update(model_update)
                    self.db.volume_update(ctxt, volume_ref['id'], updates)
        if not moved:----如果force_host_copy为真,走这个分支
            try:
                self._migrate_volume_generic(ctxt, volume_ref, host,----步骤五详解
                                             new_type_id)
            except Exception:
                with excutils.save_and_reraise_exception():
                    updates = {'migration_status': None}
                    if status_update:
                        updates.update(status_update)
                    model_update = self.driver.create_export(ctxt, volume_ref)
                    if model_update:
                        updates.update(model_update)
                    self.db.volume_update(ctxt, volume_ref['id'], updates)

 四、 force_host_copy取值不同,所走分支不同的详解

默认情况下,force_host_copy为假,由后端存储驱动来完成卷的迁移工作,如果force_host_copy为真,那么有cinder-volume服务所在的主机完成,卷的迁移工作

force_host_copy为假的情况

对步骤四进行详解
ceph不支持卷迁移,以lvm卷为前提进行分析
D:\code-program\cinder-codejuno\volume\drivers\lvm.py
class LVMISCSIDriver(LVMVolumeDriver, driver.ISCSIDriver):
  def migrate_volume(self, ctxt, volume, host, thin=False, mirror_count=0):
        """Optimize the migration if the destination is on the same server.

        If the specified host is another back-end on the same server, and
        the volume is not attached, we can do the migration locally without
        going through iSCSI.
        """

        false_ret = (False, None)
        if volume['status'] != 'available':
            return false_ret
        if 'location_info' not in host['capabilities']:
            return false_ret
        info = host['capabilities']['location_info']
        try:
            (dest_type, dest_hostname, dest_vg, lvm_type, lvm_mirrors) =\
                info.split(':')
            lvm_mirrors = int(lvm_mirrors)
        except ValueError:
            return false_ret
        if (dest_type != 'LVMVolumeDriver' or dest_hostname != self.hostname):
            return false_ret

        if dest_vg != self.vg.vg_name:
            vg_list = volutils.get_all_volume_groups()
            try:
                (vg for vg in vg_list if vg['name'] == dest_vg).next()
            except StopIteration:
                message = (_("Destination Volume Group %s does not exist") %
                           dest_vg)
                LOG.error(message)
                return false_ret

            helper = utils.get_root_helper()
            dest_vg_ref = lvm.LVM(dest_vg, helper,
                                  lvm_type=lvm_type,
                                  executor=self._execute)
            self.remove_export(ctxt, volume)
            self._create_volume(volume['name'],
                                self._sizestr(volume['size']),
                                lvm_type,
                                lvm_mirrors,
                                dest_vg_ref)

        volutils.copy_volume(self.local_path(volume),
                             self.local_path(volume, vg=dest_vg),
                             volume['size'],
                             self.configuration.volume_dd_blocksize,
                             execute=self._execute)
        self._delete_volume(volume)
        model_update = self._create_export(ctxt, volume, vg=dest_vg)

        return (True, model_update)
如果迁移卷的dest_vg,与该节点配置的vg相同,那么就在该vg上对源卷进行一个拷贝,删除源卷的动作
如果迁移卷的dest_vg,与该节点配置的vg不相同,那么判断dest_vg有效的情况下,移除源卷的export,在dest_vg上创建一个新卷,拷贝数据到目标卷,删除源卷

force_host_copy为真的情况,在这种情况下,是把目的卷,挂载到源卷所在的存储节点上,然后执行linux dd的命令,进行数据的拷贝,因此,拷贝数据的时间长短,完全取决于卷的大小,及存储节点物理主机的cpu及io性能,所以这种情况下,会导致一种情况是,因为数据量太大,数据dd拷贝的时间太长,导致迁移失败的问题

对步骤五进行详解
# Timeout for creating the volume to migrate to when
# performing volume migration (seconds) (integer value)
#migration_create_volume_timeout_secs=300
D:\code-program\cinder-codejuno\volume\manager.py
class VolumeManager(manager.SchedulerDependentManager):
    def _migrate_volume_generic(self, ctxt, volume, host, new_type_id):
        rpcapi = volume_rpcapi.VolumeAPI()

        # Create new volume on remote host
        new_vol_values = {}
        for k, v in volume.iteritems():
            new_vol_values[k] = v
        del new_vol_values['id']
        del new_vol_values['_name_id']
        # We don't copy volume_type because the db sets that according to
        # volume_type_id, which we do copy
        del new_vol_values['volume_type']
        if new_type_id:
            new_vol_values['volume_type_id'] = new_type_id
        new_vol_values['host'] = host['host']
        new_vol_values['status'] = 'creating'
        new_vol_values['migration_status'] = 'target:%s' % volume['id']
        new_vol_values['attach_status'] = 'detached'
        new_volume = self.db.volume_create(ctxt, new_vol_values)
        rpcapi.create_volume(ctxt, new_volume, host['host'],
                             None, None, allow_reschedule=False)
        #以上部分,在数据库中新增一条卷的记录,同时在目的存储节点上,创建一个指定存储类型的卷
        # Wait for new_volume to become ready
		在指定的时间段内,检查目标存储节点上的卷,状态是否正常
        starttime = time.time()
        deadline = starttime + CONF.migration_create_volume_timeout_secs
        new_volume = self.db.volume_get(ctxt, new_volume['id'])
        tries = 0
        while new_volume['status'] != 'available':
            tries = tries + 1
            now = time.time()
            if new_volume['status'] == 'error':
                msg = _("failed to create new_volume on destination host")
                raise exception.VolumeMigrationFailed(reason=msg)
            elif now > deadline:
                msg = _("timeout creating new_volume on destination host")
                raise exception.VolumeMigrationFailed(reason=msg)
            else:
                time.sleep(tries ** 2)
            new_volume = self.db.volume_get(ctxt, new_volume['id'])

        # Copy the source volume to the destination volume
        try:
            if (volume['instance_uuid'] is None and
                    volume['attached_host'] is None):
                self.driver.copy_volume_data(ctxt, volume, new_volume,---这里的driver为 volume_driver 的值,对步骤六详解
                                             remote='dest')
                # The above call is synchronous so we complete the migration
                self.migrate_volume_completion(ctxt, volume['id'],-----完成迁移的后续工作,删除源卷,更新数据库状态
                                               new_volume['id'], error=False)
            else:
                nova_api = compute.API()
                # This is an async call to Nova, which will call the completion
                # when it's done
                nova_api.update_server_volume(ctxt, volume['instance_uuid'],
                                              volume['id'], new_volume['id'])
        except Exception:
            with excutils.save_and_reraise_exception():
                msg = _("Failed to copy volume %(vol1)s to %(vol2)s")
                LOG.error(msg % {'vol1': volume['id'],
                                 'vol2': new_volume['id']})
                volume = self.db.volume_get(ctxt, volume['id'])
                # If we're in the completing phase don't delete the target
                # because we may have already deleted the source!
                if volume['migration_status'] == 'migrating':
                    rpcapi.delete_volume(ctxt, new_volume)
                new_volume['migration_status'] = None
对步骤六进行详解
D:\code-program\cinder-codejuno\volume\driver.py	
 self.driver的取值为volume_driver的值,该方法调用的是下面父类里面的方法
核心功能是把目的节点上的卷,挂载到源卷的节点上,进行Linux dd方式的数据拷贝,在数据考完完成以后,卸载目的卷
class VolumeDriver(object):

    def copy_volume_data(self, context, src_vol, dest_vol, remote=None):
        """Copy data from src_vol to dest_vol."""
        LOG.debug(('copy_data_between_volumes %(src)s -> %(dest)s.')
                  % {'src': src_vol['name'], 'dest': dest_vol['name']})

        properties = utils.brick_get_connector_properties()
        dest_remote = True if remote in ['dest', 'both'] else False
        dest_orig_status = dest_vol['status']
        try:
            dest_attach_info = self._attach_volume(context,
                                                   dest_vol,
                                                   properties,
                                                   remote=dest_remote)
        except Exception:
            with excutils.save_and_reraise_exception():
                msg = _("Failed to attach volume %(vol)s")
                LOG.error(msg % {'vol': dest_vol['id']})
                self.db.volume_update(context, dest_vol['id'],
                                      {'status': dest_orig_status})

        src_remote = True if remote in ['src', 'both'] else False
        src_orig_status = src_vol['status']
        try:
            src_attach_info = self._attach_volume(context,
                                                  src_vol,
                                                  properties,
                                                  remote=src_remote)
        except Exception:
            with excutils.save_and_reraise_exception():
                msg = _("Failed to attach volume %(vol)s")
                LOG.error(msg % {'vol': src_vol['id']})
                self.db.volume_update(context, src_vol['id'],
                                      {'status': src_orig_status})
                self._detach_volume(context, dest_attach_info, dest_vol,
                                    properties, force=True, remote=dest_remote)

        copy_error = True
        mode = self.HOST_BASED
        key_values = {}
        try:
            size_in_mb = int(src_vol['size']) * 1024    # vol size is in GB
            src_device_path = src_attach_info['device']['path']
            dest_device_path = dest_attach_info['device']['path']
            if (not isinstance(src_device_path, six.string_types) or
                    not isinstance(dest_device_path, six.string_types)):
                mode = self.FILE_BASED
            key_values = {src_vol['id']: mode,
                          src_vol['id'] + 'previous_progress': '0'}
            if mode == self.HOST_BASED:
                key_values[src_vol['id'] + 'source'] = src_device_path
                key_values[src_vol['id'] + 'dest'] = dest_device_path
                key_values[src_vol['id'] + 'pid'] = None
            else:
                key_values[src_vol['id'] + 'dest_handle'] = None
            self._add_migration_info_key(key_values)

            volume_utils.copy_volume(
                src_device_path,
                dest_device_path,
                size_in_mb,
                self.configuration.volume_dd_blocksize)
            copy_error = False
        except Exception:
            with excutils.save_and_reraise_exception():
                msg = _("Failed to copy volume %(src)s to %(dest)s.")
                LOG.error(msg % {'src': src_vol['id'], 'dest': dest_vol['id']})
        finally:
            self._detach_volume(context, dest_attach_info, dest_vol,
                                properties, force=copy_error,
                                remote=dest_remote)
            self._detach_volume(context, src_attach_info, src_vol,
                                properties, force=copy_error,
                                remote=src_remote)

  

 

posted @ 2020-04-07 22:58  一切都是当下  阅读(615)  评论(0编辑  收藏  举报