qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
To: Yuval Shaia <yuval.shaia@oracle.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 01/13] hw/rdma: Make distinction between device init and start modes
Date: Tue, 24 Jul 2018 22:53:15 +0300	[thread overview]
Message-ID: <313a3e4b-9026-76fb-f8c7-edc412de2eff@gmail.com> (raw)
In-Reply-To: <20180724192941.GA3927@lap1>



On 07/24/2018 10:29 PM, Yuval Shaia wrote:
> On Tue, Jul 24, 2018 at 03:08:10PM +0300, Marcel Apfelbaum wrote:
>> Hi Yuval,
>>
>> On 07/16/2018 10:40 AM, Yuval Shaia wrote:
>>> There are certain operations that are well considered as part of device
>>> configuration while others are needed only when "start" command is
>>> triggered by the guest driver. An example of device initialization step
>>> is msix_init and example of "device start" stage is the creation of a CQ
>>> completion handler thread.
>>>
>>> Driver expects such distinction - implement it.
>>>
>>> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
>>> ---
>>>    hw/rdma/rdma_backend.c      |  96 +++++++++++++++++++++------
>>>    hw/rdma/rdma_backend.h      |   2 +
>>>    hw/rdma/rdma_backend_defs.h |   3 +-
>>>    hw/rdma/vmw/pvrdma_main.c   | 129 +++++++++++++++++++++---------------
>>>    4 files changed, 155 insertions(+), 75 deletions(-)
>>>
>>> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
>>> index e9ced6f9ef..647e16399f 100644
>>> --- a/hw/rdma/rdma_backend.c
>>> +++ b/hw/rdma/rdma_backend.c
>>> @@ -35,6 +35,7 @@
>>>    #define VENDOR_ERR_MR_SMALL         0x208
>>>    #define THR_NAME_LEN 16
>>> +#define THR_POLL_TO  5000
>>>    typedef struct BackendCtx {
>>>        uint64_t req_id;
>>> @@ -91,35 +92,82 @@ static void *comp_handler_thread(void *arg)
>>>        int rc;
>>>        struct ibv_cq *ev_cq;
>>>        void *ev_ctx;
>>> +    int flags;
>>> +    GPollFD pfds[1];
>>> +
>>> +    /* Change to non-blocking mode */
>>> +    flags = fcntl(backend_dev->channel->fd, F_GETFL);
>>> +    rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK);
>>> +    if (rc < 0) {
>>> +        pr_dbg("Fail to change to non-blocking mode\n");
>>> +        return NULL;
>>> +    }
>>>        pr_dbg("Starting\n");
>>> +    pfds[0].fd = backend_dev->channel->fd;
>>> +    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
>>> +
>>> +    backend_dev->comp_thread.is_running = true;
>>> +
>>>        while (backend_dev->comp_thread.run) {
>>> -        pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
>>> -        rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
>>> -        pr_dbg("ibv_get_cq_event=%d\n", rc);
>>> -        if (unlikely(rc)) {
>>> -            pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
>>> -            continue;
>>> -        }
>>> +        do {
>>> +            rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS);
>>> +        } while (!rc && backend_dev->comp_thread.run);
>>> +
>>> +        if (backend_dev->comp_thread.run) {
>>> +            pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
>>> +            rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
>>> +            pr_dbg("ibv_get_cq_event=%d\n", rc);
>>> +            if (unlikely(rc)) {
>>> +                pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
>>> +                continue;
>>> +            }
>>> -        rc = ibv_req_notify_cq(ev_cq, 0);
>>> -        if (unlikely(rc)) {
>>> -            pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
>>> -        }
>>> +            rc = ibv_req_notify_cq(ev_cq, 0);
>>> +            if (unlikely(rc)) {
>>> +                pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
>>> +            }
>>> -        poll_cq(backend_dev->rdma_dev_res, ev_cq);
>>> +            poll_cq(backend_dev->rdma_dev_res, ev_cq);
>>> -        ibv_ack_cq_events(ev_cq, 1);
>>> +            ibv_ack_cq_events(ev_cq, 1);
>>> +        }
>>>        }
>>>        pr_dbg("Going down\n");
>>>        /* TODO: Post cqe for all remaining buffs that were posted */
>>> +    backend_dev->comp_thread.is_running = false;
>>> +
>>> +    qemu_thread_exit(0);
>>> +
>>>        return NULL;
>>>    }
>>> +static void stop_comp_thread(RdmaBackendDev *backend_dev)
>>> +{
>>> +    backend_dev->comp_thread.run = false;
>>> +    while (backend_dev->comp_thread.is_running) {
>>> +        pr_dbg("Waiting for thread to complete\n");
>>> +        sleep(THR_POLL_TO / SCALE_US / 2);
>>> +    }
>>> +}
>>> +
>>> +static void start_comp_thread(RdmaBackendDev *backend_dev)
>>> +{
>>> +    char thread_name[THR_NAME_LEN] = {0};
>>> +
>>> +    stop_comp_thread(backend_dev);
>>> +
>>> +    snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
>>> +             ibv_get_device_name(backend_dev->ib_dev));
>>> +    backend_dev->comp_thread.run = true;
>>> +    qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
>>> +                       comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
>>> +}
>>> +
>>>    void rdma_backend_register_comp_handler(void (*handler)(int status,
>>>                                            unsigned int vendor_err, void *ctx))
>>>    {
>>> @@ -706,7 +754,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>>>        int i;
>>>        int ret = 0;
>>>        int num_ibv_devices;
>>> -    char thread_name[THR_NAME_LEN] = {0};
>>>        struct ibv_device **dev_list;
>>>        struct ibv_port_attr port_attr;
>>> @@ -800,11 +847,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>>>        pr_dbg("interface_id=0x%" PRIx64 "\n",
>>>               be64_to_cpu(backend_dev->gid.global.interface_id));
>>> -    snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
>>> -             ibv_get_device_name(backend_dev->ib_dev));
>>> -    backend_dev->comp_thread.run = true;
>>> -    qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
>>> -                       comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
>>> +    backend_dev->comp_thread.run = false;
>>> +    backend_dev->comp_thread.is_running = false;
>>>        ah_cache_init();
>>> @@ -823,8 +867,22 @@ out:
>>>        return ret;
>>>    }
>>> +
>>> +void rdma_backend_start(RdmaBackendDev *backend_dev)
>>> +{
>>> +    pr_dbg("Starting rdma_backend\n");
>>> +    start_comp_thread(backend_dev);
>>> +}
>>> +
>>> +void rdma_backend_stop(RdmaBackendDev *backend_dev)
>>> +{
>>> +    pr_dbg("Stopping rdma_backend\n");
>>> +    stop_comp_thread(backend_dev);
>>> +}
>>> +
>>>    void rdma_backend_fini(RdmaBackendDev *backend_dev)
>>>    {
>>> +    rdma_backend_stop(backend_dev);
>>>        g_hash_table_destroy(ah_hash);
>>>        ibv_destroy_comp_channel(backend_dev->channel);
>>>        ibv_close_device(backend_dev->context);
>>> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
>>> index 3cd636dd88..3049a73962 100644
>>> --- a/hw/rdma/rdma_backend.h
>>> +++ b/hw/rdma/rdma_backend.h
>>> @@ -52,6 +52,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>>>                          uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
>>>                          Error **errp);
>>>    void rdma_backend_fini(RdmaBackendDev *backend_dev);
>>> +void rdma_backend_start(RdmaBackendDev *backend_dev);
>>> +void rdma_backend_stop(RdmaBackendDev *backend_dev);
>>>    void rdma_backend_register_comp_handler(void (*handler)(int status,
>>>                                            unsigned int vendor_err, void *ctx));
>>>    void rdma_backend_unregister_comp_handler(void);
>>> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
>>> index ff5cfc26eb..7404f64002 100644
>>> --- a/hw/rdma/rdma_backend_defs.h
>>> +++ b/hw/rdma/rdma_backend_defs.h
>>> @@ -24,7 +24,8 @@ typedef struct RdmaDeviceResources RdmaDeviceResources;
>>>    typedef struct RdmaBackendThread {
>>>        QemuThread thread;
>>>        QemuMutex mutex;
>>> -    bool run;
>>> +    bool run; /* Set by thread manager to let thread know it should exit */
>>> +    bool is_running; /* Set by the thread to report its status */
>>>    } RdmaBackendThread;
>>>    typedef struct RdmaBackendDev {
>>> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
>>> index 3ed7409763..6a5073974d 100644
>>> --- a/hw/rdma/vmw/pvrdma_main.c
>>> +++ b/hw/rdma/vmw/pvrdma_main.c
>>> @@ -286,8 +286,78 @@ static void init_ports(PVRDMADev *dev, Error **errp)
>>>        }
>>>    }
>>> +static void uninit_msix(PCIDevice *pdev, int used_vectors)
>>> +{
>>> +    PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> +    int i;
>>> +
>>> +    for (i = 0; i < used_vectors; i++) {
>>> +        msix_vector_unuse(pdev, i);
>>> +    }
>>> +
>>> +    msix_uninit(pdev, &dev->msix, &dev->msix);
>>> +}
>>> +
>>> +static int init_msix(PCIDevice *pdev, Error **errp)
>>> +{
>>> +    PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> +    int i;
>>> +    int rc;
>>> +
>>> +    rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
>>> +                   RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
>>> +                   RDMA_MSIX_PBA, 0, NULL);
>>> +
>>> +    if (rc < 0) {
>>> +        error_setg(errp, "Failed to initialize MSI-X");
>>> +        return rc;
>>> +    }
>>> +
>>> +    for (i = 0; i < RDMA_MAX_INTRS; i++) {
>>> +        rc = msix_vector_use(PCI_DEVICE(dev), i);
>>> +        if (rc < 0) {
>>> +            error_setg(errp, "Fail mark MSI-X vector %d", i);
>>> +            uninit_msix(pdev, i);
>>> +            return rc;
>>> +        }
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>> The above functions were only moved around the same file. Right?
> Correct.
>
>> I suggest keeping them as they were for easier review.
> I think that this is what i did, the rest is the work of diff :)
>
> Anyways, functions should be gathered as groups and now looks like they are
> so better have one-time-"hard"-review and have file looks organized.
>
>>> +static void pvrdma_fini(PCIDevice *pdev)
>>> +{
>>> +    PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> +
>>> +    pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
>>> +           PCI_FUNC(pdev->devfn));
>>> +
>>> +    pvrdma_qp_ops_fini();
>>> +
>>> +    rdma_rm_fini(&dev->rdma_dev_res);
>>> +
>>> +    rdma_backend_fini(&dev->backend_dev);
>>> +
>>> +    free_dsr(dev);
>>> +
>>> +    if (msix_enabled(pdev)) {
>>> +        uninit_msix(pdev, RDMA_MAX_INTRS);
>>> +    }
>>> +}
>>> +
>>> +static void pvrdma_stop(PVRDMADev *dev)
>>> +{
>>> +    rdma_backend_stop(&dev->backend_dev);
>>> +}
>>> +
>>> +static void pvrdma_start(PVRDMADev *dev)
>>> +{
>>> +    rdma_backend_start(&dev->backend_dev);
>>> +}
>>> +
>> You might not need the above functions for now.
> Yeah, agree but how about leave it just as place holder for others that
> will probably join and also from modularity perspectives better that caller
> will not be aware of what exactly needs to be start/stop.

Sure.

Thanks,
Marcel

>
> Thought?
>>>    static void activate_device(PVRDMADev *dev)
>>>    {
>>> +    pvrdma_start(dev);
>>>        set_reg_val(dev, PVRDMA_REG_ERR, 0);
>>>        pr_dbg("Device activated\n");
>>>    }
>>> @@ -300,7 +370,10 @@ static int unquiesce_device(PVRDMADev *dev)
>>>    static int reset_device(PVRDMADev *dev)
>>>    {
>>> +    pvrdma_stop(dev);
>>> +
>>>        pr_dbg("Device reset complete\n");
>>> +
>>>        return 0;
>>>    }
>>> @@ -469,45 +542,6 @@ static void init_regs(PCIDevice *pdev)
>>>        set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF);
>>>    }
>>> -static void uninit_msix(PCIDevice *pdev, int used_vectors)
>>> -{
>>> -    PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> -    int i;
>>> -
>>> -    for (i = 0; i < used_vectors; i++) {
>>> -        msix_vector_unuse(pdev, i);
>>> -    }
>>> -
>>> -    msix_uninit(pdev, &dev->msix, &dev->msix);
>>> -}
>>> -
>>> -static int init_msix(PCIDevice *pdev, Error **errp)
>>> -{
>>> -    PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> -    int i;
>>> -    int rc;
>>> -
>>> -    rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
>>> -                   RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
>>> -                   RDMA_MSIX_PBA, 0, NULL);
>>> -
>>> -    if (rc < 0) {
>>> -        error_setg(errp, "Failed to initialize MSI-X");
>>> -        return rc;
>>> -    }
>>> -
>>> -    for (i = 0; i < RDMA_MAX_INTRS; i++) {
>>> -        rc = msix_vector_use(PCI_DEVICE(dev), i);
>>> -        if (rc < 0) {
>>> -            error_setg(errp, "Fail mark MSI-X vercor %d", i);
>>> -            uninit_msix(pdev, i);
>>> -            return rc;
>>> -        }
>>> -    }
>>> -
>>> -    return 0;
>>> -}
>>> -
>>>    static void init_dev_caps(PVRDMADev *dev)
>>>    {
>>>        size_t pg_tbl_bytes = TARGET_PAGE_SIZE *
>>> @@ -602,22 +636,7 @@ out:
>>>    static void pvrdma_exit(PCIDevice *pdev)
>>>    {
>>> -    PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> -
>>> -    pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
>>> -           PCI_FUNC(pdev->devfn));
>>> -
>>> -    pvrdma_qp_ops_fini();
>>> -
>>> -    rdma_rm_fini(&dev->rdma_dev_res);
>>> -
>>> -    rdma_backend_fini(&dev->backend_dev);
>>> -
>>> -    free_dsr(dev);
>>> -
>>> -    if (msix_enabled(pdev)) {
>>> -        uninit_msix(pdev, RDMA_MAX_INTRS);
>>> -    }
>>> +    pvrdma_fini(pdev);
>>>    }
>>>    static void pvrdma_class_init(ObjectClass *klass, void *data)
>>
>>
>> Reviewed-by: Marcel Apfelbaum<marcel.apfelbaum@gmail.com>
>>
>> Thanks,
>> Marcel

  reply	other threads:[~2018-07-24 19:53 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-16  7:40 [Qemu-devel] [PATCH 00/13] Misc fixes for pvrdma device Yuval Shaia
2018-07-16  7:40 ` [Qemu-devel] [PATCH 01/13] hw/rdma: Make distinction between device init and start modes Yuval Shaia
2018-07-24 12:08   ` Marcel Apfelbaum
2018-07-24 19:29     ` Yuval Shaia
2018-07-24 19:53       ` Marcel Apfelbaum [this message]
2018-07-16  7:40 ` [Qemu-devel] [PATCH 02/13] hw/pvrdma: Bugfix - provide the correct attr_mask to query_qp Yuval Shaia
2018-07-16 10:38   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 03/13] hw/rdma: Modify debug macros Yuval Shaia
2018-07-24 12:10   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 04/13] hw/pvrdma: Clean CQE before use Yuval Shaia
2018-07-16 10:41   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 05/13] hw/pvrdma: Make default pkey 0xFFFF Yuval Shaia
2018-07-16 10:42   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 06/13] hw/rdma: Get rid of unneeded structure Yuval Shaia
2018-07-16 10:44   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 07/13] hw/rdma: Do not allocate memory for non-dma MR Yuval Shaia
2018-07-24 12:19   ` Marcel Apfelbaum
2018-08-05 14:56     ` Yuval Shaia
2018-07-16  7:40 ` [Qemu-devel] [PATCH 08/13] hw/rdma: Reorder resource cleanup Yuval Shaia
2018-07-16 10:44   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 09/13] hw/pvrdma: Cosmetic change - indent right Yuval Shaia
2018-07-16 10:45   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 10/13] hw/rdma: Cosmetic change - move to generic function Yuval Shaia
2018-07-16 10:46   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 11/13] hw/rdma: Print backend QP number in hex format Yuval Shaia
2018-07-16 10:46   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 12/13] hw/rdma: Bugfix: Support non-aligned buffers Yuval Shaia
2018-07-24 12:22   ` Marcel Apfelbaum
2018-07-16  7:40 ` [Qemu-devel] [PATCH 13/13] hw/rdma: Save pci dev in backend_dev Yuval Shaia
2018-07-16 10:49   ` Marcel Apfelbaum

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=313a3e4b-9026-76fb-f8c7-edc412de2eff@gmail.com \
    --to=marcel.apfelbaum@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yuval.shaia@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).