SPDK线程模型

(二)reactor框架: 上层APP协议,与reactor框架的交互
(1.1)SPDK的主线程
SPDK(APP)在启动时候会让指定绑定在那些core上运行,这样在每个core上会创建一个线程(他叫reactor),这个线程不停的做polling操作,而如果你要在这个线程上做事情,则需要注册poller( 可以理解为一个poller就是SPDK中一个事情的thread入口函数,但是),这个线程就不停的调用poller的机型函数执行你要执行的动作。


spdk_reactors_start 
   reactor_run
     //SPDK中断模式:略
     reactor_interrupt_run 
     
     //SPDK轮询模式:主循环
     _reactor_run
         //主循环内:Step-1
         event_queue_run_batch
              spdk_ring_dequeue
              spdk_event->fn
         //主循环内:Step-2   
         spdk_thread_poll
              thread_poll   
                  //thread_poll内:Step1
                  msg_queue_run_batch
                       spdk_ring_dequeue
                       spdk_msg->fn

                  //thread_poll内:Step2     
                  thread_execute_poller
                       spdk_poller->fn
                       // 分为 vhost_blk / vhost_scsi 两种
                       vdev_worker
                          process_vq  
                             vhost_vq_avail_ring_get
                             // 分支A:vhost_blk
                             process_blk_task
                                  vhost_user_process_blk_request
                                     virtio_blk_process_request
                                         spdk_bdev_readv
                                         spdk_bdev_writev
                                         blk_request_finish
                             // 分支B:vhost_scsi
                             process_scsi_task
                               task_submit
                                 spdk_scsi_dev_queue_task
                                    scsi_lun_execute_task
                                       bdev_scsi_execute
                                          bdev_scsi_process_block
                                             bdev_scsi_readwrite

                  //thread_poll内:Step3                            
                  thread_execute_timed_poller
                       spdk_poller->fn
 
          //主循环内:Step-3   
          reactor_post_process_lw_thread  
            
                _reactor_schedule_thread
                    spdk_event_allocate
                    spdk_event_call

(三)spdk-bdev框架:与后端设备的交互

(3.1)spdk-bdev框架对上层服务提供的读写API接口

读接口:spdk_bdev_read / spdk_bdev_readv 
写接口:spdk_bdev_write / spdk_bdev_writev
 [bdev_io_submit]
    _bdev_io_submit
      bdev_io_do_submit
        spdk_bdev->fn_table->submit_request
        <A> bdev_nvme_submit_request
              bdev_nvme_readv
                spdk_nvme_ns_cmd_read_with_md
                  nvme_qpair_submit_request
                      nvme_transport_qpair_submit_request
                         spdk_nvme_transport->ops.qpair_submit_request
                          <A-1> nvme_pcie_qpair_submit_request
                          <A-2> nvme_rdma_qpair_submit_request
                          <A-3> nvme_tcp_qpair_submit_request
        <B> bdev_pmem_submit_request
        <C> bdev_virtio_submit_request
        <D> bdev_iscsi_submit_request
        <E> bdev_aio_submit_request

(3.3)spdk-bdev的初始化
bdev子系统的初始化函数:bdev_XXX_initialize

spdk_subsystem->init
bdev_subsystem_initialize
  spdk_bdev_initialize
    bdev_modules_init
      spdk_bdev_module->module_init
        bdev_nvme_library_init
        bdev_virtio_initialize
        bdev_iscsi_initialize
        bdev_aio_initialize
        bdev_pmem_initialize
        bdev_uring_init

(3.3)spdk-bdev的注册函数
spdk_bdev_fn_table的注册函数 XXX_fn_table

spdk_bdev_fn之nvme设备

static const struct spdk_bdev_fn_table nvmelib_fn_table = 
{
	.submit_request	    = bdev_nvme_submit_request,
	.io_type_supported  = bdev_nvme_io_type_supported,
	.get_io_channel	    = bdev_nvme_get_io_channel,	
	.write_config_json  = bdev_nvme_write_config_json,

};

spdk_bdev_fn之pmem设备

static const struct spdk_bdev_fn_table pmem_fn_table = 
{
	.submit_request      = bdev_pmem_submit_request,
	.io_type_supported   = bdev_pmem_io_type_supported,
	.get_io_channel	     = bdev_pmem_get_io_channel,
	.write_config_json   = bdev_pmem_write_config_json,
};

spdk_bdev_fn之virtio设备

static const struct spdk_bdev_fn_table virtio_fn_table =
{
	.submit_request	     = bdev_virtio_submit_request,
	.io_type_supported   = bdev_virtio_io_type_supported,
	.get_io_channel	     = bdev_virtio_get_io_channel,
	.write_config_json   = bdev_virtio_write_config_json,
};

(三)bdev框架中-nvme设备的实现

(3.1)NVME的初始化:
(a)bdev-nvme的子系统初始化:

# spdk_bdev_module->module_init
bdev_nvme_library_init
  bdev_nvme_create_poll_group_cb
    spdk_nvme_poll_group_create
    SPDK_POLLER_REGISTER(bdev_nvme_poll) 
        # 方向:从nvme设备取出应答数据,回复给bdev
        spdk_nvme_poll_group_process_completions
          nvme_transport_poll_group_process_completions
            spdk_nvme_transport_poll_group->transport->ops.poll_group_process_completions
            <A> nvme_pcie_poll_group_process_completions
            <B> nvme_rdma_poll_group_process_completions
            <C> nvme_tcp_poll_group_process_completions

(a)bdev-nvme的attach

rpc_bdev_nvme_attach_controller
  bdev_nvme_create
     spdk_nvme_connect_async
         nvme_probe_internal
             nvme_transport_ctrlr_scan
             spdk_nvme_probe_ctx->attach_cb 
             connect_attach_cb
                 nvme_ctrlr_create
                    SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq)
     SPDK_POLLER_REGISTER(bdev_nvme_async_poll)  

(b)bdev-nvme的probe

spdk_nvme_probe
> spdk_nvme_probe_async
>> nvme_probe_internal
>>> nvme_transport_ctrlr_scan
>>>> spdk_nvme_transport->ops.ctrlr_scan
>>>> nvme_pcie_ctrlr_scan
>>>> nvme_fabric_ctrlr_scan

NVMe-over-RDMA扫描发现

nvme_fabric_ctrlr_scan
  nvme_ctrlr_cmd_identify
     nvme_ctrlr_submit_admin_request
        nvme_qpair_submit_request        
  nvme_fabric_ctrlr_discover
     nvme_fabric_discover_probe
        nvme_ctrlr_probe
           spdk_nvme_probe_ctx->attach_cb
           nvme_transport_ctrlr_construct

(c)然后是给这个NVME盘创建一个io_qpair
是在给这个contrlloer创建一个IO qpair(admin qpair是在创建ctrlloer时候就创建了),
也就是创建submisson-queue和completion-queue (一般是一个sq和cq对应)

spdk_nvme_ctrlr_alloc_io_qpair
  nvme_ctrlr_create_io_qpair
    spdk_nvme_ctrlr_connect_io_qpair
      nvme_transport_ctrlr_connect_qpair
        nvme_poll_group_connect_qpair
          nvme_transport_poll_group_connect_qpair
            spdk_nvme_transport_poll_group->transport->ops.poll_group_connect_qpair
            <A> nvme_pcie_poll_group_connect_qpair
            <B> nvme_rdma_poll_group_connect_qpair
            <C> nvme_tcp_poll_group_connect_qpair

(3.2)bdev框架对NVME盘的具体实现

bdev_io_do_submit
  spdk_bdev->fn_table->submit_request
  bdev_nvme_submit_request
    bdev_nvme_readv
      spdk_nvme_ns_cmd_read_with_md
        nvme_qpair_submit_request
           nvme_transport_qpair_submit_request
              spdk_nvme_transport->ops.qpair_submit_request
              <A> nvme_pcie_qpair_submit_request
              <B> nvme_rdma_qpair_submit_request
              <C> nvme_tcp_qpair_submit_request

      bdev_nvme_readv_done
        bdev_nvme_io_complete_nvme_status
          spdk_bdev_io_complete_nvme_status    
            spdk_bdev_io_complete
              bdev_io_complete
                spdk_bdev_io->internal.cb
                // spdk_bdev_io_completion_cb

(3.3)读写请求-PCIe接口

nvme_transport_qpair_submit_request
   spdk_nvme_transport->ops.qpair_submit_request
   nvme_pcie_qpair_submit_request
       nvme_pcie_qpair_build_metadata
       nvme_pcie_qpair_submit_tracker
           // Copy the command from the submit-tracker to submission-queue
           nvme_pcie_copy_command
           nvme_pcie_qpair_ring_sq_doorbell

(3.4)读写请求-RDMA接口

nvme_transport_qpair_submit_request
   spdk_nvme_transport->ops.qpair_submit_request
   nvme_rdma_qpair_submit_request
       nvme_rdma_req_init
           nvme_rdma_build_sgl_request
       nvme_rdma_qpair_queue_send_wr
           spdk_rdma_qp_queue_send_wrs
               ibv_wr_send
               ibv_wr_send_inv
               ibv_wr_rdma_read
               ibv_wr_rdma_write
               ibv_wr_set_sge_list
           nvme_rdma_qpair_submit_sends
               spdk_rdma_qp_flush_send_wrs
                   ibv_wr_complete

(6)代码解析:HotPlug

rpc_bdev_nvme_set_hotplug
  bdev_nvme_set_hotplug
    set_nvme_hotplug_period_cb 
      bdev_nvme_hotplug 
        spdk_nvme_probe_async
          nvme_probe_internal
            nvme_transport_ctrlr_scan
  rpc_bdev_nvme_set_hotplug_done

(四)bdev框架中-virtio设备的实现

(4.4)bdev框架下virtio的实现

rpc_bdev_virtio_attach_controller
   <A> bdev_virtio_pci_blk_dev_create
   <B> bdev_virtio_pci_scsi_dev_create

virtio_blk的实现

rpc_bdev_virtio_attach_controller
   bdev_virtio_pci_blk_dev_create
     bdev_virtio_pci_blk_dev_create_cb
       virtio_pci_blk_dev_create
         virtio_pci_dev_init
         virtio_blk_dev_init
           virtio_dev_start
           bdev_virtio_blk_ch_create_cb
              <注册> bdev_virtio_poll

virtio_scsi的实现

rpc_bdev_virtio_attach_controller
   bdev_virtio_pci_scsi_dev_create
     bdev_virtio_pci_scsi_dev_create_cb
       virtio_pci_scsi_dev_create
          virtio_pci_dev_init
          virtio_scsi_dev_init
             virtio_dev_start
             bdev_virtio_scsi_ch_create_cb
               <注册>bdev_virtio_poll

(3)读写请求
一个读写请求,发到qpair的submisson queue中,这个submisson queue是一个环形队列,此时这个环形队列的tail指针++,此时再把这个 tail位置通过门铃寄存器(每个qpair一个门铃寄存器)告诉下面的盘(controller)。此时controller根据门铃寄存器记录的环形队列的尾巴知道数据最多可以取到哪里。环形队列的head指针controller也是可以知道。``
而且处理一个请求这个head指针++,这个head位置是controller来维护的,但是这个位置可以通过completion queue的entry记录着。
每处理完一个请求,则controller会产生一个完成请求entry,然后挂在completion queue中,然后更新completion queue的head位置,然后通过中断告诉host可以取完成请求了。
主机host通过head位置访问completion queue的那个entry。

(6)代码解析:NVMe-oF Target专题
nvmf的上层模型

spdk_thread_poll
  thread_execute_poller
    nvmf_poll_group_poll
      nvmf_transport_poll_group_poll
	spdk_nvmf_transport_poll_group->transport->ops->poll_group_poll
          <A> nvmf_rdma_poll_group_poll
          <B> nvmf_tcp_poll_group_poll
          <C> nvmf_vfio_user_poll_group_poll

nvmf的rdma实现

nvmf_transport_poll_group_poll
  spdk_nvmf_transport_poll_group->transport->ops->poll_group_poll
  nvmf_rdma_poll_group_poll
    nvmf_rdma_poller_poll
      nvmf_rdma_qpair_process_pending
        nvmf_rdma_request_process
          spdk_nvmf_request_exec

nvmf的tcp实现

nvmf_transport_poll_group_poll
  spdk_nvmf_transport_poll_group->transport->ops->poll_group_poll
  nvmf_tcp_poll_group_poll
    spdk_sock_group_poll
      spdk_sock_group_poll_count
        sock_group_impl_poll_count
           spdk_sock_group_impl->net_impl->group_impl_poll
           uring_sock_group_impl_poll
              io_uring_submit

nvmf的vfio实现

nvmf_transport_poll_group_poll
  spdk_nvmf_transport_poll_group->transport->ops->poll_group_poll
  nvmf_vfio_user_poll_group_poll
    handle_sq_tdbl_write
      consume_cmd
        handle_cmd_req
          spdk_nvmf_request_exec
            nvmf_ctrlr_process_io_cmd
              nvmf_bdev_ctrlr_read_cmd
                 spdk_bdev_readv_blocks
                   bdev_readv_blocks_with_md
                     bdev_io_submit
              nvmf_bdev_ctrlr_write_cmd
                 spdk_bdev_writev_blocks
                   bdev_writev_blocks_with_md
                     bdev_io_submit
                    

(6)代码解析:blob专题
blob的运行环境

blobfs => blob => bdev
rocksdb => blob => bdev

blob向上提供的读接口:

spdk_file_read
  spdk_file->fs->send_request
    spdk_blob_io_read
      blob_request_submit_op
        blob_request_submit_op_single
          bs_batch_read_dev
            blob_bdev->bs_dev.read
              bdev_blob_read
                 spdk_bdev_read_blocks

blob向上提供的写接口:

spdk_file_write
  spdk_file->fs->send_request
    spdk_blob_io_write 
      blob_request_submit_op
        blob_request_submit_op_single
          bs_batch_write_dev
            blob_bdev->bs_dev.write
            bdev_blob_write
              spdk_bdev_write_blocks

posted @ 2022-02-05 22:13  乌鸦嘴-raven  阅读(1364)  评论(3编辑  收藏  举报