qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
To: Yuval Shaia <yuval.shaia@oracle.com>, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 01/13] hw/rdma: Make distinction between device init and start modes
Date: Tue, 24 Jul 2018 15:08:10 +0300	[thread overview]
Message-ID: <0fef2395-433c-80ee-b5b7-6b78cb543cc8@gmail.com> (raw)
In-Reply-To: <20180716074038.3364-2-yuval.shaia@oracle.com>

Hi Yuval,

On 07/16/2018 10:40 AM, Yuval Shaia wrote:
> There are certain operations that are well considered as part of device
> configuration while others are needed only when "start" command is
> triggered by the guest driver. An example of device initialization step
> is msix_init and example of "device start" stage is the creation of a CQ
> completion handler thread.
>
> Driver expects such distinction - implement it.
>
> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
> ---
>   hw/rdma/rdma_backend.c      |  96 +++++++++++++++++++++------
>   hw/rdma/rdma_backend.h      |   2 +
>   hw/rdma/rdma_backend_defs.h |   3 +-
>   hw/rdma/vmw/pvrdma_main.c   | 129 +++++++++++++++++++++---------------
>   4 files changed, 155 insertions(+), 75 deletions(-)
>
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index e9ced6f9ef..647e16399f 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -35,6 +35,7 @@
>   #define VENDOR_ERR_MR_SMALL         0x208
>   
>   #define THR_NAME_LEN 16
> +#define THR_POLL_TO  5000
>   
>   typedef struct BackendCtx {
>       uint64_t req_id;
> @@ -91,35 +92,82 @@ static void *comp_handler_thread(void *arg)
>       int rc;
>       struct ibv_cq *ev_cq;
>       void *ev_ctx;
> +    int flags;
> +    GPollFD pfds[1];
> +
> +    /* Change to non-blocking mode */
> +    flags = fcntl(backend_dev->channel->fd, F_GETFL);
> +    rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK);
> +    if (rc < 0) {
> +        pr_dbg("Fail to change to non-blocking mode\n");
> +        return NULL;
> +    }
>   
>       pr_dbg("Starting\n");
>   
> +    pfds[0].fd = backend_dev->channel->fd;
> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
> +
> +    backend_dev->comp_thread.is_running = true;
> +
>       while (backend_dev->comp_thread.run) {
> -        pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
> -        rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
> -        pr_dbg("ibv_get_cq_event=%d\n", rc);
> -        if (unlikely(rc)) {
> -            pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
> -            continue;
> -        }
> +        do {
> +            rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS);
> +        } while (!rc && backend_dev->comp_thread.run);
> +
> +        if (backend_dev->comp_thread.run) {
> +            pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
> +            rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
> +            pr_dbg("ibv_get_cq_event=%d\n", rc);
> +            if (unlikely(rc)) {
> +                pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
> +                continue;
> +            }
>   
> -        rc = ibv_req_notify_cq(ev_cq, 0);
> -        if (unlikely(rc)) {
> -            pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
> -        }
> +            rc = ibv_req_notify_cq(ev_cq, 0);
> +            if (unlikely(rc)) {
> +                pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
> +            }
>   
> -        poll_cq(backend_dev->rdma_dev_res, ev_cq);
> +            poll_cq(backend_dev->rdma_dev_res, ev_cq);
>   
> -        ibv_ack_cq_events(ev_cq, 1);
> +            ibv_ack_cq_events(ev_cq, 1);
> +        }
>       }
>   
>       pr_dbg("Going down\n");
>   
>       /* TODO: Post cqe for all remaining buffs that were posted */
>   
> +    backend_dev->comp_thread.is_running = false;
> +
> +    qemu_thread_exit(0);
> +
>       return NULL;
>   }
>   
> +static void stop_comp_thread(RdmaBackendDev *backend_dev)
> +{
> +    backend_dev->comp_thread.run = false;
> +    while (backend_dev->comp_thread.is_running) {
> +        pr_dbg("Waiting for thread to complete\n");
> +        sleep(THR_POLL_TO / SCALE_US / 2);
> +    }
> +}
> +
> +static void start_comp_thread(RdmaBackendDev *backend_dev)
> +{
> +    char thread_name[THR_NAME_LEN] = {0};
> +
> +    stop_comp_thread(backend_dev);
> +
> +    snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
> +             ibv_get_device_name(backend_dev->ib_dev));
> +    backend_dev->comp_thread.run = true;
> +    qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
> +                       comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
> +}
> +
>   void rdma_backend_register_comp_handler(void (*handler)(int status,
>                                           unsigned int vendor_err, void *ctx))
>   {
> @@ -706,7 +754,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>       int i;
>       int ret = 0;
>       int num_ibv_devices;
> -    char thread_name[THR_NAME_LEN] = {0};
>       struct ibv_device **dev_list;
>       struct ibv_port_attr port_attr;
>   
> @@ -800,11 +847,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>       pr_dbg("interface_id=0x%" PRIx64 "\n",
>              be64_to_cpu(backend_dev->gid.global.interface_id));
>   
> -    snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
> -             ibv_get_device_name(backend_dev->ib_dev));
> -    backend_dev->comp_thread.run = true;
> -    qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
> -                       comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
> +    backend_dev->comp_thread.run = false;
> +    backend_dev->comp_thread.is_running = false;
>   
>       ah_cache_init();
>   
> @@ -823,8 +867,22 @@ out:
>       return ret;
>   }
>   
> +
> +void rdma_backend_start(RdmaBackendDev *backend_dev)
> +{
> +    pr_dbg("Starting rdma_backend\n");
> +    start_comp_thread(backend_dev);
> +}
> +
> +void rdma_backend_stop(RdmaBackendDev *backend_dev)
> +{
> +    pr_dbg("Stopping rdma_backend\n");
> +    stop_comp_thread(backend_dev);
> +}
> +
>   void rdma_backend_fini(RdmaBackendDev *backend_dev)
>   {
> +    rdma_backend_stop(backend_dev);
>       g_hash_table_destroy(ah_hash);
>       ibv_destroy_comp_channel(backend_dev->channel);
>       ibv_close_device(backend_dev->context);
> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> index 3cd636dd88..3049a73962 100644
> --- a/hw/rdma/rdma_backend.h
> +++ b/hw/rdma/rdma_backend.h
> @@ -52,6 +52,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>                         uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
>                         Error **errp);
>   void rdma_backend_fini(RdmaBackendDev *backend_dev);
> +void rdma_backend_start(RdmaBackendDev *backend_dev);
> +void rdma_backend_stop(RdmaBackendDev *backend_dev);
>   void rdma_backend_register_comp_handler(void (*handler)(int status,
>                                           unsigned int vendor_err, void *ctx));
>   void rdma_backend_unregister_comp_handler(void);
> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
> index ff5cfc26eb..7404f64002 100644
> --- a/hw/rdma/rdma_backend_defs.h
> +++ b/hw/rdma/rdma_backend_defs.h
> @@ -24,7 +24,8 @@ typedef struct RdmaDeviceResources RdmaDeviceResources;
>   typedef struct RdmaBackendThread {
>       QemuThread thread;
>       QemuMutex mutex;
> -    bool run;
> +    bool run; /* Set by thread manager to let thread know it should exit */
> +    bool is_running; /* Set by the thread to report its status */
>   } RdmaBackendThread;
>   
>   typedef struct RdmaBackendDev {
> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> index 3ed7409763..6a5073974d 100644
> --- a/hw/rdma/vmw/pvrdma_main.c
> +++ b/hw/rdma/vmw/pvrdma_main.c
> @@ -286,8 +286,78 @@ static void init_ports(PVRDMADev *dev, Error **errp)
>       }
>   }
>   
> +static void uninit_msix(PCIDevice *pdev, int used_vectors)
> +{
> +    PVRDMADev *dev = PVRDMA_DEV(pdev);
> +    int i;
> +
> +    for (i = 0; i < used_vectors; i++) {
> +        msix_vector_unuse(pdev, i);
> +    }
> +
> +    msix_uninit(pdev, &dev->msix, &dev->msix);
> +}
> +
> +static int init_msix(PCIDevice *pdev, Error **errp)
> +{
> +    PVRDMADev *dev = PVRDMA_DEV(pdev);
> +    int i;
> +    int rc;
> +
> +    rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
> +                   RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
> +                   RDMA_MSIX_PBA, 0, NULL);
> +
> +    if (rc < 0) {
> +        error_setg(errp, "Failed to initialize MSI-X");
> +        return rc;
> +    }
> +
> +    for (i = 0; i < RDMA_MAX_INTRS; i++) {
> +        rc = msix_vector_use(PCI_DEVICE(dev), i);
> +        if (rc < 0) {
> +            error_setg(errp, "Fail mark MSI-X vector %d", i);
> +            uninit_msix(pdev, i);
> +            return rc;
> +        }
> +    }
> +
> +    return 0;
> +}
> +

The above functions were only moved around the same file. Right?
I suggest keeping them as they were for easier review.

> +static void pvrdma_fini(PCIDevice *pdev)
> +{
> +    PVRDMADev *dev = PVRDMA_DEV(pdev);
> +
> +    pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
> +           PCI_FUNC(pdev->devfn));
> +
> +    pvrdma_qp_ops_fini();
> +
> +    rdma_rm_fini(&dev->rdma_dev_res);
> +
> +    rdma_backend_fini(&dev->backend_dev);
> +
> +    free_dsr(dev);
> +
> +    if (msix_enabled(pdev)) {
> +        uninit_msix(pdev, RDMA_MAX_INTRS);
> +    }
> +}
> +
> +static void pvrdma_stop(PVRDMADev *dev)
> +{
> +    rdma_backend_stop(&dev->backend_dev);
> +}
> +
> +static void pvrdma_start(PVRDMADev *dev)
> +{
> +    rdma_backend_start(&dev->backend_dev);
> +}
> +

You might not need the above functions for now.

>   static void activate_device(PVRDMADev *dev)
>   {
> +    pvrdma_start(dev);
>       set_reg_val(dev, PVRDMA_REG_ERR, 0);
>       pr_dbg("Device activated\n");
>   }
> @@ -300,7 +370,10 @@ static int unquiesce_device(PVRDMADev *dev)
>   
>   static int reset_device(PVRDMADev *dev)
>   {
> +    pvrdma_stop(dev);
> +
>       pr_dbg("Device reset complete\n");
> +
>       return 0;
>   }
>   
> @@ -469,45 +542,6 @@ static void init_regs(PCIDevice *pdev)
>       set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF);
>   }
>   
> -static void uninit_msix(PCIDevice *pdev, int used_vectors)
> -{
> -    PVRDMADev *dev = PVRDMA_DEV(pdev);
> -    int i;
> -
> -    for (i = 0; i < used_vectors; i++) {
> -        msix_vector_unuse(pdev, i);
> -    }
> -
> -    msix_uninit(pdev, &dev->msix, &dev->msix);
> -}
> -
> -static int init_msix(PCIDevice *pdev, Error **errp)
> -{
> -    PVRDMADev *dev = PVRDMA_DEV(pdev);
> -    int i;
> -    int rc;
> -
> -    rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
> -                   RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
> -                   RDMA_MSIX_PBA, 0, NULL);
> -
> -    if (rc < 0) {
> -        error_setg(errp, "Failed to initialize MSI-X");
> -        return rc;
> -    }
> -
> -    for (i = 0; i < RDMA_MAX_INTRS; i++) {
> -        rc = msix_vector_use(PCI_DEVICE(dev), i);
> -        if (rc < 0) {
> -            error_setg(errp, "Fail mark MSI-X vercor %d", i);
> -            uninit_msix(pdev, i);
> -            return rc;
> -        }
> -    }
> -
> -    return 0;
> -}
> -
>   static void init_dev_caps(PVRDMADev *dev)
>   {
>       size_t pg_tbl_bytes = TARGET_PAGE_SIZE *
> @@ -602,22 +636,7 @@ out:
>   
>   static void pvrdma_exit(PCIDevice *pdev)
>   {
> -    PVRDMADev *dev = PVRDMA_DEV(pdev);
> -
> -    pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
> -           PCI_FUNC(pdev->devfn));
> -
> -    pvrdma_qp_ops_fini();
> -
> -    rdma_rm_fini(&dev->rdma_dev_res);
> -
> -    rdma_backend_fini(&dev->backend_dev);
> -
> -    free_dsr(dev);
> -
> -    if (msix_enabled(pdev)) {
> -        uninit_msix(pdev, RDMA_MAX_INTRS);
> -    }
> +    pvrdma_fini(pdev);
>   }
>   
>   static void pvrdma_class_init(ObjectClass *klass, void *data)



Reviewed-by: Marcel Apfelbaum<marcel.apfelbaum@gmail.com>

Thanks,
Marcel

  reply	other threads:[~2018-07-24 12:08 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-16  7:40 [Qemu-devel] [PATCH 00/13] Misc fixes for pvrdma device Yuval Shaia
2018-07-16  7:40 ` [Qemu-devel] [PATCH 01/13] hw/rdma: Make distinction between device init and start modes Yuval Shaia
2018-07-24 12:08   ` Marcel Apfelbaum [this message]
2018-07-24 19:29     ` Yuval Shaia
2018-07-24 19:53       ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 02/13] hw/pvrdma: Bugfix - provide the correct attr_mask to query_qp Yuval Shaia
2018-07-16 10:38   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 03/13] hw/rdma: Modify debug macros Yuval Shaia
2018-07-24 12:10   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 04/13] hw/pvrdma: Clean CQE before use Yuval Shaia
2018-07-16 10:41   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 05/13] hw/pvrdma: Make default pkey 0xFFFF Yuval Shaia
2018-07-16 10:42   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 06/13] hw/rdma: Get rid of unneeded structure Yuval Shaia
2018-07-16 10:44   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 07/13] hw/rdma: Do not allocate memory for non-dma MR Yuval Shaia
2018-07-24 12:19   ` Marcel Apfelbaum
2018-08-05 14:56     ` Yuval Shaia
2018-07-16  7:40 ` [Qemu-devel] [PATCH 08/13] hw/rdma: Reorder resource cleanup Yuval Shaia
2018-07-16 10:44   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 09/13] hw/pvrdma: Cosmetic change - indent right Yuval Shaia
2018-07-16 10:45   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 10/13] hw/rdma: Cosmetic change - move to generic function Yuval Shaia
2018-07-16 10:46   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 11/13] hw/rdma: Print backend QP number in hex format Yuval Shaia
2018-07-16 10:46   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 12/13] hw/rdma: Bugfix: Support non-aligned buffers Yuval Shaia
2018-07-24 12:22   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 13/13] hw/rdma: Save pci dev in backend_dev Yuval Shaia
2018-07-16 10:49   ` Marcel Apfelbaum

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0fef2395-433c-80ee-b5b7-6b78cb543cc8@gmail.com \
    --to=marcel.apfelbaum@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yuval.shaia@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).