qemu + dpdk vq->callfd

 

  qemu-kvm segfault in kvm packstack aio + ovs-dpdk

 https://bugzilla.redhat.com/show_bug.cgi?id=1380703

(gdb) bt
#0  0x00007fb6c573c0b8 in kvm_virtio_pci_irqfd_use (proxy=proxy@entry=0x7fb6d17e2000, queue_no=queue_no@entry=0, vector=vector@entry=1) at hw/virtio/virtio-pci.c:498
#1  0x00007fb6c573d2de in virtio_pci_vq_vector_unmask (msg=..., vector=1, queue_no=0, proxy=0x7fb6d17e2000) at hw/virtio/virtio-pci.c:624
#2  virtio_pci_vector_unmask (dev=0x7fb6d17e2000, vector=1, msg=...) at hw/virtio/virtio-pci.c:660
#3  0x00007fb6c570d7ca in msix_set_notifier_for_vector (vector=1, dev=0x7fb6d17e2000) at hw/pci/msix.c:513
#4  msix_set_vector_notifiers (dev=dev@entry=0x7fb6d17e2000, use_notifier=use_notifier@entry=0x7fb6c573d130 <virtio_pci_vector_unmask>, 
    release_notifier=release_notifier@entry=0x7fb6c573d080 <virtio_pci_vector_mask>, poll_notifier=poll_notifier@entry=0x7fb6c573bf40 <virtio_pci_vector_poll>) at hw/pci/msix.c:540
#5  0x00007fb6c573d82d in virtio_pci_set_guest_notifiers (d=0x7fb6d17e2000, nvqs=2, assign=<optimized out>) at hw/virtio/virtio-pci.c:821
#6  0x00007fb6c55ed1c0 in vhost_net_start (dev=dev@entry=0x7fb6d17e9f40, ncs=0x7fb6c8601da0, total_queues=total_queues@entry=1) at /usr/src/debug/qemu-2.3.0/hw/net/vhost_net.c:353
#7  0x00007fb6c55e91e4 in virtio_net_vhost_status (status=<optimized out>, n=0x7fb6d17e9f40) at /usr/src/debug/qemu-2.3.0/hw/net/virtio-net.c:143
#8  virtio_net_set_status (vdev=<optimized out>, status=7 '\a') at /usr/src/debug/qemu-2.3.0/hw/net/virtio-net.c:162
#9  0x00007fb6c55f97dc in virtio_set_status (vdev=vdev@entry=0x7fb6d17e9f40, val=val@entry=7 '\a') at /usr/src/debug/qemu-2.3.0/hw/virtio/virtio.c:609
#10 0x00007fb6c573ca4e in virtio_ioport_write (val=7, addr=18, opaque=0x7fb6d17e2000) at hw/virtio/virtio-pci.c:283
#11 virtio_pci_config_write (opaque=0x7fb6d17e2000, addr=18, val=7, size=<optimized out>) at hw/virtio/virtio-pci.c:409
#12 0x00007fb6c55ca3d7 in memory_region_write_accessor (mr=0x7fb6d17e2880, addr=<optimized out>, value=0x7fb6ac21a338, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
    at /usr/src/debug/qemu-2.3.0/memory.c:457
#13 0x00007fb6c55ca0e9 in access_with_adjusted_size (addr=addr@entry=18, value=value@entry=0x7fb6ac21a338, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, 
    access=access@entry=0x7fb6c55ca380 <memory_region_write_accessor>, mr=mr@entry=0x7fb6d17e2880, attrs=attrs@entry=...) at /usr/src/debug/qemu-2.3.0/memory.c:516
#14 0x00007fb6c55cbb51 in memory_region_dispatch_write (mr=mr@entry=0x7fb6d17e2880, addr=18, data=7, size=1, attrs=...) at /usr/src/debug/qemu-2.3.0/memory.c:1161
#15 0x00007fb6c55976e0 in address_space_rw (as=0x7fb6c5c51cc0 <address_space_io>, addr=49266, attrs=..., buf=buf@entry=0x7fb6ac21a40c "\a\177", len=len@entry=1, is_write=is_write@entry=true)
    at /usr/src/debug/qemu-2.3.0/exec.c:2353
#16 0x00007fb6c559794b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., attrs@entry=..., buf=buf@entry=0x7fb6ac21a40c "\a\177", len=len@entry=1)
    at /usr/src/debug/qemu-2.3.0/exec.c:2415
#17 0x00007fb6c55c3a4c in cpu_outb (addr=<optimized out>, val=7 '\a') at /usr/src/debug/qemu-2.3.0/ioport.c:67
#18 0x00007fb6ad7020b0 in ?? ()
#19 0x00007fb6ac21a500 in ?? ()
#20 0x0000000000000000 in ?? ()

 

 https://bugzilla.redhat.com/show_bug.cgi?id=1410716

Qemu segfault when using TCG acceleration with vhost-user netdev:

#0  0x00005624dd5b2cee in kvm_virtio_pci_irqfd_use (proxy=proxy@entry=0x5624e62f8000, queue_no=queue_no@entry=0, vector=vector@entry=1) at /usr/src/debug/qemu-2.7.0/hw/virtio/virtio-pci.c:735
#1  0x00005624dd5b4556 in virtio_pci_vq_vector_unmask (msg=..., vector=1, queue_no=0, proxy=0x5624e62f8000) at /usr/src/debug/qemu-2.7.0/hw/virtio/virtio-pci.c:860
#2  0x00005624dd5b4556 in virtio_pci_vector_unmask (dev=0x5624e62f8000, vector=1, msg=...) at /usr/src/debug/qemu-2.7.0/hw/virtio/virtio-pci.c:896
#3  0x00005624dd55eb06 in msix_set_notifier_for_vector (vector=1, dev=0x5624e62f8000) at /usr/src/debug/qemu-2.7.0/hw/pci/msix.c:525
#4  0x00005624dd55eb06 in msix_set_vector_notifiers (dev=dev@entry=0x5624e62f8000, use_notifier=use_notifier@entry=0x5624dd5b4320 <virtio_pci_vector_unmask>, release_notifier=release_notifier@entry=0x5624dd5b4280 <virtio_pci_vector_mask>, poll_notifier=poll_notifier@entry=0x5624dd5b2b80 <virtio_pci_vector_poll>) at /usr/src/debug/qemu-2.7.0/hw/pci/msix.c:552
#5  0x00005624dd5b4829 in virtio_pci_set_guest_notifiers (d=0x5624e62f8000, nvqs=2, assign=true) at /usr/src/debug/qemu-2.7.0/hw/virtio/virtio-pci.c:1057
#6  0x00005624dd3e037b in vhost_net_start (dev=dev@entry=0x5624e6300340, ncs=0x5624df8d7f00, total_queues=total_queues@entry=1) at /usr/src/debug/qemu-2.7.0/hw/net/vhost_net.c:317
#7  0x00005624dd3dce73 in virtio_net_vhost_status (status=15 '\017', n=0x5624e6300340) at /usr/src/debug/qemu-2.7.0/hw/net/virtio-net.c:151
#8  0x00005624dd3dce73 in virtio_net_set_status (vdev=<optimized out>, status=<optimized out>) at /usr/src/debug/qemu-2.7.0/hw/net/virtio-net.c:224
#9  0x00005624dd3f08b9 in virtio_set_status (vdev=vdev@entry=0x5624e6300340, val=<optimized out>) at /usr/src/debug/qemu-2.7.0/hw/virtio/virtio.c:765
#10 0x00005624dd5b28c4 in virtio_pci_common_write (opaque=0x5624e62f8000, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at /usr/src/debug/qemu-2.7.0/hw/virtio/virtio-pci.c:1298
#11 0x00005624dd3af086 in memory_region_write_accessor (mr=<optimized out>, addr=<optimized out>, value=<optimized out>, size=<optimized out>, shift=<optimized out>, mask=<optimized out>, attrs=...)
    at /usr/src/debug/qemu-2.7.0/memory.c:525
#12 0x00005624dd3ad97d in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7fb9583fa0d8, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access=
    0x5624dd3af040 <memory_region_write_accessor>, mr=0x5624e62f8988, attrs=...) at /usr/src/debug/qemu-2.7.0/memory.c:591
#13 0x00005624dd3b0dac in memory_region_dispatch_write (mr=<optimized out>, addr=20, data=<optimized out>, size=<optimized out>, attrs=...) at /usr/src/debug/qemu-2.7.0/memory.c:1275
#14 0x00007fb962c730a4 in code_gen_buffer ()
#15 0x00005624dd35f805 in cpu_tb_exec (cpu=cpu@entry=0x5624e51d6000, itb=<optimized out>, itb=<optimized out>) at /usr/src/debug/qemu-2.7.0/cpu-exec.c:166
#16 0x00005624dd374f22 in cpu_loop_exec_tb (sc=0x7fb9583fa640, tb_exit=<synthetic pointer>, last_tb=0x7fb9583fa630, tb=<optimized out>, cpu=0x5624e51d6000) at /usr/src/debug/qemu-2.7.0/cpu-exec.c:530
#17 0x00005624dd374f22 in cpu_exec (cpu=<optimized out>) at /usr/src/debug/qemu-2.7.0/cpu-exec.c:625
#18 0x00005624dd39aa8f in qemu_tcg_cpu_thread_fn (arg=<optimized out>) at /usr/src/debug/qemu-2.7.0/cpus.c:1541
#19 0x00007fb9b8d9b6ca in start_thread (arg=0x7fb9583fd700) at pthread_create.c:333
#20 0x00007fb9b8ad5f6f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:105

 

 

 

复制代码
Breakpoint 2, vhost_user_set_vring_call (pdev=0xffffbc3ecfa8, msg=0xffffbc3ecd00, main_fd=45) at /data1/dpdk-19.11/lib/librte_vhost/vhost_user.c:1563
1563            struct virtio_net *dev = *pdev;
(gdb) bt
#0  vhost_user_set_vring_call (pdev=0xffffbc3ecfa8, msg=0xffffbc3ecd00, main_fd=45) at /data1/dpdk-19.11/lib/librte_vhost/vhost_user.c:1563
#1  0x0000000000511e24 in vhost_user_msg_handler (vid=0, fd=45) at /data1/dpdk-19.11/lib/librte_vhost/vhost_user.c:2689  ----------消息处理
#2  0x0000000000504eb0 in vhost_user_read_cb (connfd=45, dat=0x15a3de0, remove=0xffffbc3ed064) at /data1/dpdk-19.11/lib/librte_vhost/socket.c:306
#3  0x0000000000502198 in fdset_event_dispatch (arg=0x11f37f8 <vhost_user+8192>) at /data1/dpdk-19.11/lib/librte_vhost/fd_man.c:286
#4  0x00000000005a05a4 in rte_thread_init (arg=0x15a42c0) at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_thread.c:165
#5  0x0000ffffbe617d38 in start_thread (arg=0xffffbc3ed910) at pthread_create.c:309
#6  0x0000ffffbe55f5f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:91
(gdb) 
复制代码

 

 

 

复制代码
Breakpoint 1, vhost_user_set_vring_kick (pdev=0xffffbc3ecfa8, msg=0xffffbc3ecd00, main_fd=45) at /data1/dpdk-19.11/lib/librte_vhost/vhost_user.c:1795
1795            struct virtio_net *dev = *pdev;
(gdb) bt
#0  vhost_user_set_vring_kick (pdev=0xffffbc3ecfa8, msg=0xffffbc3ecd00, main_fd=45) at /data1/dpdk-19.11/lib/librte_vhost/vhost_user.c:1795
#1  0x0000000000511e24 in vhost_user_msg_handler (vid=0, fd=45) at /data1/dpdk-19.11/lib/librte_vhost/vhost_user.c:2689--消息处理
#2  0x0000000000504eb0 in vhost_user_read_cb (connfd=45, dat=0x15a3de0, remove=0xffffbc3ed064) at /data1/dpdk-19.11/lib/librte_vhost/socket.c:306
#3  0x0000000000502198 in fdset_event_dispatch (arg=0x11f37f8 <vhost_user+8192>) at /data1/dpdk-19.11/lib/librte_vhost/fd_man.c:286
#4  0x00000000005a05a4 in rte_thread_init (arg=0x15a42c0) at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_thread.c:165
#5  0x0000ffffbe617d38 in start_thread (arg=0xffffbc3ed910) at pthread_create.c:309
#6  0x0000ffffbe55f5f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:91
(gdb) 

 

 

 

 

 

 

rte_vhost_driver_register

int
rte_vhost_driver_register(const char *path, uint64_t flags)
{
    int ret = -1;
    struct vhost_user_socket *vsocket;

    if (!path)
        return -1;

    pthread_mutex_lock(&vhost_user.mutex);

    if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
        RTE_LOG(ERR, VHOST_CONFIG,
            "error: the number of vhost sockets reaches maximum\n");
        goto out;
    }

    vsocket = malloc(sizeof(struct vhost_user_socket));
    if (!vsocket)
        goto out;
    memset(vsocket, 0, sizeof(struct vhost_user_socket));
    vsocket->path = strdup(path);
    if (vsocket->path == NULL) {
        RTE_LOG(ERR, VHOST_CONFIG,
            "error: failed to copy socket path string\n");
        vhost_user_socket_mem_free(vsocket);
        goto out;
    }
    TAILQ_INIT(&vsocket->conn_list);
    ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
    if (ret) {
        RTE_LOG(ERR, VHOST_CONFIG,
            "error: failed to init connection mutex\n");
        goto out_free;
    }
    vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;

    /*
     * Set the supported features correctly for the builtin vhost-user
     * net driver.
     *
     * Applications know nothing about features the builtin virtio net
     * driver (virtio_net.c) supports, thus it's not possible for them
     * to invoke rte_vhost_driver_set_features(). To workaround it, here
     * we set it unconditionally. If the application want to implement
     * another vhost-user driver (say SCSI), it should call the
     * rte_vhost_driver_set_features(), which will overwrite following
     * two values.
     */
    vsocket->use_builtin_virtio_net = true;
    vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
    vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
    vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;

    /*
     * Dequeue zero copy can't assure descriptors returned in order.
     * Also, it requires that the guest memory is populated, which is
     * not compatible with postcopy.
     */
    if (vsocket->dequeue_zero_copy) {
        vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
        vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);

        RTE_LOG(INFO, VHOST_CONFIG,
            "Dequeue zero copy requested, disabling postcopy support\n");
        vsocket->protocol_features &=
            ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
    }

    if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
        vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
        vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
    }

    if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
        vsocket->protocol_features &=
            ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
    } else {
#ifndef RTE_LIBRTE_VHOST_POSTCOPY
        RTE_LOG(ERR, VHOST_CONFIG,
            "Postcopy requested but not compiled\n");
        ret = -1;
        goto out_mutex;
#endif
    }

    if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
        vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
        if (vsocket->reconnect && reconn_tid == 0) {
            if (vhost_user_reconnect_init() != 0)
                goto out_mutex;
        }
    } else {
        vsocket->is_server = true;
    }
    ret = create_unix_socket(vsocket);
    if (ret < 0) {
        goto out_mutex;
    }

    vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;

    pthread_mutex_unlock(&vhost_user.mutex);
    return ret;

out_mutex:
    if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
        RTE_LOG(ERR, VHOST_CONFIG,
            "error: failed to destroy connection mutex\n");
    }
out_free:
    vhost_user_socket_mem_free(vsocket);
out:
    pthread_mutex_unlock(&vhost_user.mutex);

    return ret;
}

 

rte_vhost_driver_start(Socket_files)

 

int
rte_vhost_driver_start(const char *path)
{
    struct vhost_user_socket *vsocket;
    static pthread_t fdset_tid;

    pthread_mutex_lock(&vhost_user.mutex);
    vsocket = find_vhost_user_socket(path);
    pthread_mutex_unlock(&vhost_user.mutex);

    if (!vsocket)
        return -1;

    if (fdset_tid == 0) {
        /**
         * create a pipe which will be waited by poll and notified to
         * rebuild the wait list of poll.
         */
        if (fdset_pipe_init(&vhost_user.fdset) < 0) {
            RTE_LOG(ERR, VHOST_CONFIG,
                "failed to create pipe for vhost fdset\n");
            return -1;
        }

        int ret = rte_ctrl_thread_create(&fdset_tid,
            "vhost-events", NULL, fdset_event_dispatch,
            &vhost_user.fdset);
        if (ret != 0) {
            RTE_LOG(ERR, VHOST_CONFIG,
                "failed to create fdset handling thread");

            fdset_pipe_uninit(&vhost_user.fdset);
            return -1;
        }
    }

    if (vsocket->is_server)
        return vhost_user_start_server(vsocket);
    else
        return vhost_user_start_client(vsocket);
}

 

vhost_user_msg_handler
static void
vhost_user_read_cb(int connfd, void *dat, int *remove)
{
    struct vhost_user_connection *conn = dat;
    struct vhost_user_socket *vsocket = conn->vsocket;
    int ret;

    ret = vhost_user_msg_handler(conn->vid, connfd);
    if (ret < 0) {
        close(connfd);
        *remove = 1;
        vhost_destroy_device(conn->vid);

        if (vsocket->notify_ops->destroy_connection)
            vsocket->notify_ops->destroy_connection(conn->vid);

        pthread_mutex_lock(&vsocket->conn_mutex);
        TAILQ_REMOVE(&vsocket->conn_list, conn, next);
        pthread_mutex_unlock(&vsocket->conn_mutex);

        free(conn);

        if (vsocket->reconnect) {
            create_unix_socket(vsocket);
            vhost_user_start_client(vsocket);
        }
    }
}

 

 

 

int
vhost_user_msg_handler(int vid, int fd)
{
    struct virtio_net *dev;
    struct VhostUserMsg msg;
    struct rte_vdpa_device *vdpa_dev;
    int did = -1;
    int ret;
    int unlock_required = 0;
    uint32_t skip_master = 0;
    int request;

    dev = get_device(vid);
    if (dev == NULL)
        return -1;

    if (!dev->notify_ops) {
        dev->notify_ops = vhost_driver_callback_get(dev->ifname);
        if (!dev->notify_ops) {
            RTE_LOG(ERR, VHOST_CONFIG,
                "failed to get callback ops for driver %s\n",
                dev->ifname);
            return -1;
        }
    }

    ret = read_vhost_message(fd, &msg);
    if (ret <= 0 || msg.request.master >= VHOST_USER_MAX) {
        if (ret < 0)
            RTE_LOG(ERR, VHOST_CONFIG,
                "vhost read message failed\n");
        else if (ret == 0)
            RTE_LOG(INFO, VHOST_CONFIG,
                "vhost peer closed\n");
        else
            RTE_LOG(ERR, VHOST_CONFIG,
                "vhost read incorrect message\n");

        return -1;
    }

    ret = 0;
    if (msg.request.master != VHOST_USER_IOTLB_MSG)
        RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
            vhost_message_str[msg.request.master]);
    else
        RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
            vhost_message_str[msg.request.master]);

    ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
    if (ret < 0) {
        RTE_LOG(ERR, VHOST_CONFIG,
            "failed to alloc queue\n");
        return -1;
    }

    /*
     * Note: we don't lock all queues on VHOST_USER_GET_VRING_BASE
     * and VHOST_USER_RESET_OWNER, since it is sent when virtio stops
     * and device is destroyed. destroy_device waits for queues to be
     * inactive, so it is safe. Otherwise taking the access_lock
     * would cause a dead lock.
     */
    switch (msg.request.master) {
    case VHOST_USER_SET_FEATURES:
    case VHOST_USER_SET_PROTOCOL_FEATURES:
    case VHOST_USER_SET_OWNER:
    case VHOST_USER_SET_MEM_TABLE:
    case VHOST_USER_SET_LOG_BASE:
    case VHOST_USER_SET_LOG_FD:
    case VHOST_USER_SET_VRING_NUM:
    case VHOST_USER_SET_VRING_ADDR:
    case VHOST_USER_SET_VRING_BASE:
    case VHOST_USER_SET_VRING_KICK:
    case VHOST_USER_SET_VRING_CALL:
    case VHOST_USER_SET_VRING_ERR:
    case VHOST_USER_SET_VRING_ENABLE:
    case VHOST_USER_SEND_RARP:
    case VHOST_USER_NET_SET_MTU:
    case VHOST_USER_SET_SLAVE_REQ_FD:
        vhost_user_lock_all_queue_pairs(dev);
        unlock_required = 1;
        break;
    default:
        break;

    }

    if (dev->extern_ops.pre_msg_handle) {
        ret = (*dev->extern_ops.pre_msg_handle)(dev->vid,
                (void *)&msg, &skip_master);
        if (ret == VH_RESULT_ERR)
            goto skip_to_reply;
        else if (ret == VH_RESULT_REPLY)
            send_vhost_reply(fd, &msg);

        if (skip_master)
            goto skip_to_post_handle;
    }

    request = msg.request.master;
    if (request > VHOST_USER_NONE && request < VHOST_USER_MAX) {
        if (!vhost_message_handlers[request])
            goto skip_to_post_handle;
        ret = vhost_message_handlers[request](&dev, &msg, fd);

        switch (ret) {
        case VH_RESULT_ERR:
            RTE_LOG(ERR, VHOST_CONFIG,
                "Processing %s failed.\n",
                vhost_message_str[request]);
            break;
        case VH_RESULT_OK:
            RTE_LOG(DEBUG, VHOST_CONFIG,
                "Processing %s succeeded.\n",
                vhost_message_str[request]);
            break;
        case VH_RESULT_REPLY:
            RTE_LOG(DEBUG, VHOST_CONFIG,
                "Processing %s succeeded and needs reply.\n",
                vhost_message_str[request]);
            send_vhost_reply(fd, &msg);
            break;
        }
    } else {
        RTE_LOG(ERR, VHOST_CONFIG,
            "Requested invalid message type %d.\n", request);
        ret = VH_RESULT_ERR;
    }

skip_to_post_handle:
    if (ret != VH_RESULT_ERR && dev->extern_ops.post_msg_handle) {
        ret = (*dev->extern_ops.post_msg_handle)(
                dev->vid, (void *)&msg);
        if (ret == VH_RESULT_ERR)
            goto skip_to_reply;
        else if (ret == VH_RESULT_REPLY)
            send_vhost_reply(fd, &msg);
    }

skip_to_reply:
    if (unlock_required)
        vhost_user_unlock_all_queue_pairs(dev);

    /*
     * If the request required a reply that was already sent,
     * this optional reply-ack won't be sent as the
     * VHOST_USER_NEED_REPLY was cleared in send_vhost_reply().
     */
    if (msg.flags & VHOST_USER_NEED_REPLY) {
        msg.payload.u64 = ret == VH_RESULT_ERR;
        msg.size = sizeof(msg.payload.u64);
        msg.fd_num = 0;
        send_vhost_reply(fd, &msg);
    } else if (ret == VH_RESULT_ERR) {
        RTE_LOG(ERR, VHOST_CONFIG,
            "vhost message handling failed.\n");
        return -1;
    }

    if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) {
        dev->flags |= VIRTIO_DEV_READY;

        if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
            if (dev->dequeue_zero_copy) {
                RTE_LOG(INFO, VHOST_CONFIG,
                        "dequeue zero copy is enabled\n");
            }

            if (dev->notify_ops->new_device(dev->vid) == 0)
                dev->flags |= VIRTIO_DEV_RUNNING;
        }
    }

    did = dev->vdpa_dev_id;
    vdpa_dev = rte_vdpa_get_device(did);
    if (vdpa_dev && virtio_is_ready(dev) &&
            !(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED) &&
            msg.request.master == VHOST_USER_SET_VRING_ENABLE) {
        if (vdpa_dev->ops->dev_conf)
            vdpa_dev->ops->dev_conf(dev->vid);
        dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
        if (vhost_user_host_notifier_ctrl(dev->vid, true) != 0) {
            RTE_LOG(INFO, VHOST_CONFIG,
                "(%d) software relay is used for vDPA, performance may be low.\n",
                dev->vid);
        }
    }

    return 0;
}

 

实际上,QEMU中有一个与DPDK的消息处理函数类型的处理函数。
qemu-3.0.0/contrib/libvhost-user/libvhost-user.c 

1218 static bool
1219 vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
1220 {
(...)
1244     switch (vmsg->request) {
(...)
1265     case VHOST_USER_SET_VRING_ADDR:
1266         return vu_set_vring_addr_exec(dev, vmsg);
(...)

 

显然,QEMU中需要有函数通过UNIX套接口发送内存地址信息到DPDK中。
qemu-3.0.0/hw/virtio/vhost-user.c

 

 588 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
 589                                      struct vhost_vring_addr *addr)
 590 {   
 591     VhostUserMsg msg = {
 592         .hdr.request = VHOST_USER_SET_VRING_ADDR,
 593         .hdr.flags = VHOST_USER_VERSION,
 594         .payload.addr = *addr,
 595         .hdr.size = sizeof(msg.payload.addr),
 596     };
 597         
 598     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 599         return -1;
 600     }
 601         
 602     return 0;

 

 

 drivers/crypto/virtio/virtio_ring.h:28:#define VRING_AVAIL_F_NO_INTERRUPT 1

vring_virtqueue里面最重要的数据结构vring

struct vring {                                                                
    unsigned int num;

    struct vring_desc *desc;

    struct vring_avail *avail;

    struct vring_used *used;
};

可以看到vring里面又包含了三个非常重要的数据结构 vring_desc,vring_avail, vring_used.先看一下vring_avail数据结构(注:这三个数据结构前后端是通过共享内存来访问的)

struct vring_avail {                                                          
    __virtio16 flags;
    __virtio16 idx;
    __virtio16 ring[];
};

其中vring_avail flags是用来告诉后端是否要发中断,具体后端代码如下,以dpdk 中的vhost_user为例

   if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))                                                                                                                     
            eventfd_write((int)vq->callfd, 1);

可以看到当avail flags为0时后端就后发中断到guest里面。

 

 

dpdk 设置 vq->callfd 

 
static int
vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg,
                        int main_fd __rte_unused)
{
        struct virtio_net *dev = *pdev;
        struct vhost_vring_file file;
        struct vhost_virtqueue *vq;
        int expected_fds;

        expected_fds = (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) ? 0 : 1;
        if (validate_msg_fds(msg, expected_fds) != 0)
                return RTE_VHOST_MSG_RESULT_ERR;

        file.index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
        if (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
                file.fd = VIRTIO_INVALID_EVENTFD;
        else
                file.fd = msg->fds[0];
        RTE_LOG(INFO, VHOST_CONFIG,
                "vring call idx:%d file:%d\n", file.index, file.fd);

        vq = dev->virtqueue[file.index];
        if (vq->callfd >= 0)
                close(vq->callfd);

        vq->callfd = file.fd;

        return RTE_VHOST_MSG_RESULT_OK;
}

 

VHOST_CONFIG: reallocate vq from 0 to 3 node
VHOST_CONFIG: reallocate dev from 0 to 3 node
VHOST_CONFIG: read message VHOST_USER_SET_VRING_KICK
VHOST_CONFIG: vring kick idx:0 file:48
VHOST_CONFIG: read message VHOST_USER_SET_VRING_CALL
VHOST_CONFIG: vring call idx:0 file:50
VHOST_CONFIG: read message VHOST_USER_SET_VRING_NUM
VHOST_CONFIG: read message VHOST_USER_SET_VRING_BASE
VHOST_CONFIG: read message VHOST_USER_SET_VRING_ADDR
VHOST_CONFIG: reallocate vq from 0 to 3 node
VHOST_CONFIG: read message VHOST_USER_SET_VRING_KICK
VHOST_CONFIG: vring kick idx:1 file:45
VHOST_CONFIG: virtio is now ready for processing.
LSWITCH_CONFIG: Virtual device 0 is added to the device list
VHOST_CONFIG: read message VHOST_USER_SET_VRING_CALL
VHOST_CONFIG: vring call idx:1 file:51
VHOST_CONFIG: read message VHOST_USER_SET_VRING_ENABLE
VHOST_CONFIG: set queue enable: 1 to qp idx: 0
VHOST_CONFIG: read message VHOST_USER_SET_VRING_ENABLE
VHOST_CONFIG: set queue enable: 1 to qp idx: 1

 

 

 

 

 

vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
        /* Flush used->idx update before we read avail->flags. */
        rte_smp_mb();

        /* Don't kick guest if we don't reach index specified by guest. */
        if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
                uint16_t old = vq->signalled_used;
                uint16_t new = vq->last_used_idx;
                bool signalled_used_valid = vq->signalled_used_valid;

                vq->signalled_used = new;
                vq->signalled_used_valid = true;

                VHOST_LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n",
                        __func__,
                        vhost_used_event(vq),
                        old, new);

                if ((vhost_need_event(vhost_used_event(vq), new, old) &&
                                        (vq->callfd >= 0)) ||
                                unlikely(!signalled_used_valid)) {
                        eventfd_write(vq->callfd, (eventfd_t) 1);
                        if (dev->notify_ops->guest_notified)
                                dev->notify_ops->guest_notified(dev->vid);
                }
        } else {
                /* Kick the guest if necessary. */
                if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
                                && (vq->callfd >= 0)) {
                        eventfd_write(vq->callfd, (eventfd_t)1);
                        if (dev->notify_ops->guest_notified)
                                dev->notify_ops->guest_notified(dev->vid);
                }
        }
}

 

 

 

Breakpoint 1, virtio_dev_tx_split (dev=0x423fffba00, vq=0x423ffcf000, mbuf_pool=0x13f9aac00, pkts=0xffffbd40cea0, count=1) at /data1/dpdk-19.11/lib/librte_vhost/virtio_net.c:1795
1795                            vhost_vring_call_split(dev, vq);
Missing separate debuginfos, use: debuginfo-install libgcc-4.8.5-44.el7.aarch64
(gdb) bt
#0  virtio_dev_tx_split (dev=0x423fffba00, vq=0x423ffcf000, mbuf_pool=0x13f9aac00, pkts=0xffffbd40cea0, count=1) at /data1/dpdk-19.11/lib/librte_vhost/virtio_net.c:1795
#1  0x000000000052312c in rte_vhost_dequeue_burst (vid=0, queue_id=1, mbuf_pool=0x13f9aac00, pkts=0xffffbd40cea0, count=32) at /data1/dpdk-19.11/lib/librte_vhost/virtio_net.c:2255
#2  0x00000000004822b0 in learning_switch_main (arg=<optimized out>) at /data1/ovs/LearningSwitch-DPDK/main.c:503
#3  0x0000000000585fb4 in eal_thread_loop (arg=0x0) at /data1/dpdk-19.11/lib/librte_eal/linux/eal/eal_thread.c:153
#4  0x0000ffffbe617d38 in start_thread (arg=0xffffbd40d910) at pthread_create.c:309
#5  0x0000ffffbe55f5f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:91
(gdb) s
vhost_vring_call_split (vq=0x423ffcf000, dev=0x423fffba00) at /data1/dpdk-19.11/lib/librte_vhost/vhost.h:656
656             rte_smp_mb();
(gdb) list
651
652     static __rte_always_inline void
653     vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
654     {
655             /* Flush used->idx update before we read avail->flags. */
656             rte_smp_mb();
657
658             /* Don't kick guest if we don't reach index specified by guest. */
659             if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
660                     uint16_t old = vq->signalled_used;
(gdb) n
659             if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
(gdb) n
660                     uint16_t old = vq->signalled_used;
(gdb) n
661                     uint16_t new = vq->last_used_idx;
(gdb) n
662                     bool signalled_used_valid = vq->signalled_used_valid;
(gdb) n
664                     vq->signalled_used = new;
(gdb) n
665                     vq->signalled_used_valid = true;
(gdb) n
672                     if ((vhost_need_event(vhost_used_event(vq), new, old) &&
(gdb) n
674                                     unlikely(!signalled_used_valid)) {
(gdb) n
673                                             (vq->callfd >= 0)) ||
(gdb) n
675                             eventfd_write(vq->callfd, (eventfd_t) 1);
(gdb) n
676                             if (dev->notify_ops->guest_notified)
(gdb) n

 

 

qemu 侧

qemu在vhost_virtqueue_start时,会取得VirtQueue的host_notifier的rfd,并把fd通过VHOST_SET_VRING_KICK传入kvm.ko,这样kvm.ko后续就会通过eventfd_signal通知这个fd

static int vhost_virtqueue_start(struct vhost_dev *dev,
                                struct VirtIODevice *vdev,
                                struct vhost_virtqueue *vq,
                                unsigned idx)
{
    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
    hwaddr s, l, a;
    int r;
    int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
    struct vhost_vring_file file = {
        .index = vhost_vq_index
    };
    struct vhost_vring_state state = {
        .index = vhost_vq_index
    };
    struct VirtQueue *vvq = virtio_get_queue(vdev, idx);

    a = virtio_queue_get_desc_addr(vdev, idx);
    if (a == 0) {
        /* Queue might not be ready for start */
        return 0;
    }

    vq->num = state.num = virtio_queue_get_num(vdev, idx);
    r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
    if (r) {
        VHOST_OPS_DEBUG("vhost_set_vring_num failed");
        return -errno;
    }

    state.num = virtio_queue_get_last_avail_idx(vdev, idx);
    r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
    if (r) {
        VHOST_OPS_DEBUG("vhost_set_vring_base failed");
        return -errno;
    }

    if (vhost_needs_vring_endian(vdev)) {
        r = vhost_virtqueue_set_vring_endian_legacy(dev,
                                                    virtio_is_big_endian(vdev),
                                                    vhost_vq_index);
        if (r) {
            return -errno;
        }
    }

    vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
    vq->desc_phys = a;
    vq->desc = vhost_memory_map(dev, a, &l, false);
    if (!vq->desc || l != s) {
        r = -ENOMEM;
        goto fail_alloc_desc;
    }
    vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
    vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
    vq->avail = vhost_memory_map(dev, a, &l, false);
    if (!vq->avail || l != s) {
        r = -ENOMEM;
        goto fail_alloc_avail;
    }
    vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
    vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
    vq->used = vhost_memory_map(dev, a, &l, true);
    if (!vq->used || l != s) {
        r = -ENOMEM;
        goto fail_alloc_used;
    }

    r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
    if (r < 0) {
        r = -errno;
        goto fail_alloc;
    }

    file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); ----------------传递fd
    r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
    if (r) {
        VHOST_OPS_DEBUG("vhost_set_vring_kick failed");
        r = -errno;
        goto fail_kick;
    }

    /* Clear and discard previous events if any. */
    event_notifier_test_and_clear(&vq->masked_notifier);

    /* Init vring in unmasked state, unless guest_notifier_mask
     * will do it later.
     */
    if (!vdev->use_guest_notifier_mask) {
        /* TODO: check and handle errors. */
        vhost_virtqueue_mask(dev, vdev, idx, false);
    }

    if (k->query_guest_notifiers &&
        k->query_guest_notifiers(qbus->parent) &&
        virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
        file.fd = -1;  ---------------- -1哦
        r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
        if (r) {
            goto fail_vector;
        }
    }

    return 0;

fail_vector:
fail_kick:
fail_alloc:
    vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
                       0, 0);
fail_alloc_used:
    vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
                       0, 0);
fail_alloc_avail:
    vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
                       0, 0);
fail_alloc_desc:
    return r;
}

 

 

vhost_virtqueue_init函数(被vhost_dev_start调用):
1. ioctl VHOST_SET_VRING_NUM 设置vring size
2. ioctl VHOST_SET_VRING_BASE 设置  (VirtQueue.last_avail_idx)
3. 设置vhost_virtqueue中ring相关的成员(desc, avail, used_size, used_phys, used, 
     ring_size, ring_phys, ring)
4. 调用vhost_virtqueue_set_addr设置相关地址
5. 调用ioctl VHOST_SET_VRING_KICK 设置kick fd (guest -> vhost) (VirtQueue.host_notifier.fd)
6. 调用ioctl VHOST_SET_VRING_CALL 设置call fd (vhost -> guest) (VirtQueue.guest_notifier.fd)
 
guest_notifier的初始化:
1. 在vhost_dev_start起始处,会调用vdev->binding->set_guest_notifiers(vdev->binding_opaque, true)
   对于virtio pci设备来说,该函数就是virtio_pci_set_guest_notifiers(virtio-pci.c)
2. virtio_pci_set_guest_notifiers对每个可能的vq调用virtio_pci_set_guest_notifier
3. virtio_pci_set_guest_notifier先通过event_notifier_init初始化一个eventfd,然后调用
   qemu_set_fd_handler将该eventfd添加到qemu的selectable fd列表中,并指定其read poll处理
   函数为virtio_pci_guest_notifier_read
4. 调用msix_set_mask_notifier,其中msix_mask_notifier_func设置为virtio_pci_mask_notifier
5. 对该设备的所有MSI-X入口(0 - msix_entries_nr),调用msix_set_mask_notifier_for_vector
6. 如果该vector没有被屏蔽,则调用dev->msix_mask_notifier(就是virtio_pci_mask_notifier)
7. 对当前的所有VirtQueue,调用virtio_pci_mask_vq
8. 调用kvm_set_irqfd,设置该VirtQueue的irqfd为guest notifier fd
9. 如果设置成功,则调用qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, NULL)
   取消掉对该guest notifier fd的select。
 

vhost_net_start

int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
                    int total_queues)
{
    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
    struct vhost_net *net;
    int r, e, i;
    NetClientState *peer;

    if (!k->set_guest_notifiers) {
        error_report("binding does not support guest notifiers");
        return -ENOSYS;
    }

    for (i = 0; i < total_queues; i++) {

        peer = qemu_get_peer(ncs, i);
        net = get_vhost_net(peer);
        vhost_net_set_vq_index(net, i * 2);

        /* Suppress the masking guest notifiers on vhost user
         * because vhost user doesn't interrupt masking/unmasking
         * properly.
         */
        if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
            dev->use_guest_notifier_mask = false;
        }
     }

    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
    if (r < 0) {
        error_report("Error binding guest notifier: %d", -r);
        goto err;
    }

    for (i = 0; i < total_queues; i++) {
        peer = qemu_get_peer(ncs, i);
        r = vhost_net_start_one(get_vhost_net(peer), dev);

        if (r < 0) {
            goto err_start;
        }

        if (peer->vring_enable) {
            /* restore vring enable state */
            r = vhost_set_vring_enable(peer, peer->vring_enable);

            if (r < 0) {
                goto err_start;
            }
        }
    }

    return 0

 

 
vhost_user_backend_start(VhostUserBackend *b)
{
    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(b->vdev)));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
    int ret, i ;

    if (b->started) {
        return;
    }

    if (!k->set_guest_notifiers) {
        error_report("binding does not support guest notifiers");
        return;
    }

    ret = vhost_dev_enable_notifiers(&b->dev, b->vdev);
    if (ret < 0) {
        return;
    }


//virtio_pci_set_guest_notifiers 
ret
= k->set_guest_notifiers(qbus->parent, b->dev.nvqs, true); //使用中断 if (ret < 0) { error_report("Error binding guest notifier"); goto err_host_notifiers; } b->dev.acked_features = b->vdev->guest_features; ret = vhost_dev_start(&b->dev, b->vdev); if (ret < 0) { error_report("Error start vhost dev"); goto err_guest_notifiers; } /* guest_notifier_mask/pending not used yet, so just unmask * everything here. virtio-pci will do the right thing by * enabling/disabling irqfd. */ for (i = 0; i < b->dev.nvqs; i++) { vhost_virtqueue_mask(&b->dev, b->vdev, b->dev.vq_index + i, false); } b->started = true; return; err_guest_notifiers: k->set_guest_notifiers(qbus->parent, b->dev.nvqs, false); err_host_notifiers: vhost_dev_disable_notifiers(&b->dev, b->vdev); }

 

 vhost_user_backend_dev_init
      b->dev.nvqs = nvqs;
 
 
 k->set_guest_notifiers(qbus->parent, b->dev.nvqs, true);  //使用中断
static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
                                         bool with_irqfd)
{
    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
    VirtQueue *vq = virtio_get_queue(vdev, n);
    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);

    if (assign) {
        int r = event_notifier_init(notifier, 0); //初始化
        if (r < 0) {
            return r;
        }
        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
    } else {
        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
        event_notifier_cleanup(notifier);
    }

    if (!msix_enabled(&proxy->pci_dev) &&
        vdev->use_guest_notifier_mask &&
        vdc->guest_notifier_mask) {
        vdc->guest_notifier_mask(vdev, n, !assign);
    }

    return 0;
}


EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
{
    return &vq->guest_notifier;
}
 
 
 
void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool with_irqfd)
{
    if (assign && !with_irqfd) {
        event_notifier_set_handler(&vq->guest_notifier,
                                   virtio_queue_guest_notifier_read);
    } else {
        event_notifier_set_handler(&vq->guest_notifier, NULL);
    }
    if (!assign) {
        /* Test and clear notifier before closing it,
         * in case poll callback didn't have time to run. */
        virtio_queue_guest_notifier_read(&vq->guest_notifier);
    }
}

 

void event_notifier_init_fd(EventNotifier *e, int fd)
{
    e->rfd = fd;
    e->wfd = fd;
}

int event_notifier_init(EventNotifier *e, int active)
{
    int fds[2];
    int ret;

#ifdef CONFIG_EVENTFD
    ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
#else
    ret = -1;
    errno = ENOSYS;
#endif
    if (ret >= 0) {
        e->rfd = e->wfd = ret;
    } else {
        if (errno != ENOSYS) {
            return -errno;
        }
        if (qemu_pipe(fds) < 0) {
            return -errno;
        }
        ret = fcntl_setfl(fds[0], O_NONBLOCK);
        if (ret < 0) {
            ret = -errno;
            goto fail;
        }
        ret = fcntl_setfl(fds[1], O_NONBLOCK);
        if (ret < 0) {
            ret = -errno;
            goto fail;
        }
        e->rfd = fds[0];
        e->wfd = fds[1];
    }
    if (active) {
        event_notifier_set(e);
    }
    return 0;

fail:
    close(fds[0]);
    close(fds[1]);
    return ret;
}

 

如果KVM_IRQFD成功设置,kvm模块会通过irqfd把中断注入guest。qemu是通过virtio_pci_set_guest_notifiers -> kvm_virtio_pci_vector_use -> kvm_virtio_pci_irqfd_use -> kvm_irqchip_add_irqfd_notifier -> kvm_irqchip_assign_irqfd最终调用kvm_vm_ioctl来设置kvm模块的irqfd的,包含write fd和read fd(可选)

 
guest_notifier的使用:
1. vhost在处理完请求,将buffer放到used ring上面之后,往call fd里面写入
2. 如果成功设置了irqfd,则kvm会直接中断guest。如果没有成功设置,则走以下的路径
3. qemu通过select调用侦测到该事件(因为vhost的call fd就是qemu里面对应vq的guest_notifier,它
   已经被加入到selectable fd列表)
4. 调用virtio_pci_guest_notifier_read通知guest
5. guest从used ring上获取相关的数据
 
如果没有设置irqfd,则guest notifier fd会通知到等待fd的qemu进程,进入注册函数virtio_queue_guest_notifier_read,调用virtio_irq,最终调用到virtio_pci_notify

 

Guest收包中断过程
 
os_host_main_loop_wait
-> qemu_poll_ns返回
-> qemu_iohandler_poll 遍历iohandlers对时间进行处理
-> 遍历iohandlers,处理所有的event
-> ioh->fd_read(ioh->opaque); //调用fdread函数,也就是virtio_queue_guest_notifier_read
 
-> virtio_queue_guest_notifier_read
-> virtio_irq
-> virtio_notify_vector
-> k->notify(qbus->parent, vector); //调用virtio_pci_notify

 

static void virtio_queue_guest_notifier_read(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
    if (event_notifier_test_and_clear(n)) {
        virtio_irq(vq);
    }
}

void virtio_irq(VirtQueue *vq)
{
    trace_virtio_irq(vq);
    vq->vdev->isr |= 0x01;
    virtio_notify_vector(vq->vdev, vq->vector);
}

static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    if (k->notify) {
        k->notify(qbus->parent, vector);
    }
}

static void virtio_pci_notify(DeviceState *d, uint16_t vector)
{
    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);

    if (msix_enabled(&proxy->pci_dev))
        msix_notify(&proxy->pci_dev, vector);
    else {
        VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
        pci_set_irq(&proxy->pci_dev, vdev->isr & 1);
    }
}
 
host_notifier的初始化:
1. 在vhost_virtqueue_init中,会调用
   vdev->binding->set_host_notifiers(vdev->binding_opaque, idx, true)
   对于virtio pci设备来说,该函数就是virtio_pci_set_host_notifier(virtio-pci.c)
2. virtio_pci_set_host_notifier调用virtio_pci_set_host_notifier_internal
3. virtio_pci_set_host_notifier_internal先通过event_notifier_init初始化一个eventfd,再调用
   kvm_set_ioeventfd_pio_word
4. kvm_set_ioeventfd_pio_word通过调用kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick)来设置kick
   fd

virtio pmd

 

一. Eventfd在QEMU下的使用

Eventfd在QEMU下的使用以这三个函数为基础:event_notifier_init和event_notifier_get_fd,以及event_notifier_set_handler。
在event_notifier_init中,初始化EventNotifier:

posted on 2020-12-21 11:09  tycoon3  阅读(223)  评论(0编辑  收藏  举报

导航