From: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
To: Yuval Shaia <yuval.shaia@oracle.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 01/13] hw/rdma: Make distinction between device init and start modes
Date: Tue, 24 Jul 2018 22:53:15 +0300 [thread overview]
Message-ID: <313a3e4b-9026-76fb-f8c7-edc412de2eff@gmail.com> (raw)
In-Reply-To: <20180724192941.GA3927@lap1>
On 07/24/2018 10:29 PM, Yuval Shaia wrote:
> On Tue, Jul 24, 2018 at 03:08:10PM +0300, Marcel Apfelbaum wrote:
>> Hi Yuval,
>>
>> On 07/16/2018 10:40 AM, Yuval Shaia wrote:
>>> There are certain operations that are well considered as part of device
>>> configuration while others are needed only when "start" command is
>>> triggered by the guest driver. An example of device initialization step
>>> is msix_init and example of "device start" stage is the creation of a CQ
>>> completion handler thread.
>>>
>>> Driver expects such distinction - implement it.
>>>
>>> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
>>> ---
>>> hw/rdma/rdma_backend.c | 96 +++++++++++++++++++++------
>>> hw/rdma/rdma_backend.h | 2 +
>>> hw/rdma/rdma_backend_defs.h | 3 +-
>>> hw/rdma/vmw/pvrdma_main.c | 129 +++++++++++++++++++++---------------
>>> 4 files changed, 155 insertions(+), 75 deletions(-)
>>>
>>> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
>>> index e9ced6f9ef..647e16399f 100644
>>> --- a/hw/rdma/rdma_backend.c
>>> +++ b/hw/rdma/rdma_backend.c
>>> @@ -35,6 +35,7 @@
>>> #define VENDOR_ERR_MR_SMALL 0x208
>>> #define THR_NAME_LEN 16
>>> +#define THR_POLL_TO 5000
>>> typedef struct BackendCtx {
>>> uint64_t req_id;
>>> @@ -91,35 +92,82 @@ static void *comp_handler_thread(void *arg)
>>> int rc;
>>> struct ibv_cq *ev_cq;
>>> void *ev_ctx;
>>> + int flags;
>>> + GPollFD pfds[1];
>>> +
>>> + /* Change to non-blocking mode */
>>> + flags = fcntl(backend_dev->channel->fd, F_GETFL);
>>> + rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK);
>>> + if (rc < 0) {
>>> + pr_dbg("Fail to change to non-blocking mode\n");
>>> + return NULL;
>>> + }
>>> pr_dbg("Starting\n");
>>> + pfds[0].fd = backend_dev->channel->fd;
>>> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
>>> +
>>> + backend_dev->comp_thread.is_running = true;
>>> +
>>> while (backend_dev->comp_thread.run) {
>>> - pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
>>> - rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
>>> - pr_dbg("ibv_get_cq_event=%d\n", rc);
>>> - if (unlikely(rc)) {
>>> - pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
>>> - continue;
>>> - }
>>> + do {
>>> + rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS);
>>> + } while (!rc && backend_dev->comp_thread.run);
>>> +
>>> + if (backend_dev->comp_thread.run) {
>>> + pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
>>> + rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
>>> + pr_dbg("ibv_get_cq_event=%d\n", rc);
>>> + if (unlikely(rc)) {
>>> + pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
>>> + continue;
>>> + }
>>> - rc = ibv_req_notify_cq(ev_cq, 0);
>>> - if (unlikely(rc)) {
>>> - pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
>>> - }
>>> + rc = ibv_req_notify_cq(ev_cq, 0);
>>> + if (unlikely(rc)) {
>>> + pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
>>> + }
>>> - poll_cq(backend_dev->rdma_dev_res, ev_cq);
>>> + poll_cq(backend_dev->rdma_dev_res, ev_cq);
>>> - ibv_ack_cq_events(ev_cq, 1);
>>> + ibv_ack_cq_events(ev_cq, 1);
>>> + }
>>> }
>>> pr_dbg("Going down\n");
>>> /* TODO: Post cqe for all remaining buffs that were posted */
>>> + backend_dev->comp_thread.is_running = false;
>>> +
>>> + qemu_thread_exit(0);
>>> +
>>> return NULL;
>>> }
>>> +static void stop_comp_thread(RdmaBackendDev *backend_dev)
>>> +{
>>> + backend_dev->comp_thread.run = false;
>>> + while (backend_dev->comp_thread.is_running) {
>>> + pr_dbg("Waiting for thread to complete\n");
>>> + sleep(THR_POLL_TO / SCALE_US / 2);
>>> + }
>>> +}
>>> +
>>> +static void start_comp_thread(RdmaBackendDev *backend_dev)
>>> +{
>>> + char thread_name[THR_NAME_LEN] = {0};
>>> +
>>> + stop_comp_thread(backend_dev);
>>> +
>>> + snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
>>> + ibv_get_device_name(backend_dev->ib_dev));
>>> + backend_dev->comp_thread.run = true;
>>> + qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
>>> + comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
>>> +}
>>> +
>>> void rdma_backend_register_comp_handler(void (*handler)(int status,
>>> unsigned int vendor_err, void *ctx))
>>> {
>>> @@ -706,7 +754,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>>> int i;
>>> int ret = 0;
>>> int num_ibv_devices;
>>> - char thread_name[THR_NAME_LEN] = {0};
>>> struct ibv_device **dev_list;
>>> struct ibv_port_attr port_attr;
>>> @@ -800,11 +847,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>>> pr_dbg("interface_id=0x%" PRIx64 "\n",
>>> be64_to_cpu(backend_dev->gid.global.interface_id));
>>> - snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
>>> - ibv_get_device_name(backend_dev->ib_dev));
>>> - backend_dev->comp_thread.run = true;
>>> - qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
>>> - comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
>>> + backend_dev->comp_thread.run = false;
>>> + backend_dev->comp_thread.is_running = false;
>>> ah_cache_init();
>>> @@ -823,8 +867,22 @@ out:
>>> return ret;
>>> }
>>> +
>>> +void rdma_backend_start(RdmaBackendDev *backend_dev)
>>> +{
>>> + pr_dbg("Starting rdma_backend\n");
>>> + start_comp_thread(backend_dev);
>>> +}
>>> +
>>> +void rdma_backend_stop(RdmaBackendDev *backend_dev)
>>> +{
>>> + pr_dbg("Stopping rdma_backend\n");
>>> + stop_comp_thread(backend_dev);
>>> +}
>>> +
>>> void rdma_backend_fini(RdmaBackendDev *backend_dev)
>>> {
>>> + rdma_backend_stop(backend_dev);
>>> g_hash_table_destroy(ah_hash);
>>> ibv_destroy_comp_channel(backend_dev->channel);
>>> ibv_close_device(backend_dev->context);
>>> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
>>> index 3cd636dd88..3049a73962 100644
>>> --- a/hw/rdma/rdma_backend.h
>>> +++ b/hw/rdma/rdma_backend.h
>>> @@ -52,6 +52,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
>>> uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
>>> Error **errp);
>>> void rdma_backend_fini(RdmaBackendDev *backend_dev);
>>> +void rdma_backend_start(RdmaBackendDev *backend_dev);
>>> +void rdma_backend_stop(RdmaBackendDev *backend_dev);
>>> void rdma_backend_register_comp_handler(void (*handler)(int status,
>>> unsigned int vendor_err, void *ctx));
>>> void rdma_backend_unregister_comp_handler(void);
>>> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
>>> index ff5cfc26eb..7404f64002 100644
>>> --- a/hw/rdma/rdma_backend_defs.h
>>> +++ b/hw/rdma/rdma_backend_defs.h
>>> @@ -24,7 +24,8 @@ typedef struct RdmaDeviceResources RdmaDeviceResources;
>>> typedef struct RdmaBackendThread {
>>> QemuThread thread;
>>> QemuMutex mutex;
>>> - bool run;
>>> + bool run; /* Set by thread manager to let thread know it should exit */
>>> + bool is_running; /* Set by the thread to report its status */
>>> } RdmaBackendThread;
>>> typedef struct RdmaBackendDev {
>>> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
>>> index 3ed7409763..6a5073974d 100644
>>> --- a/hw/rdma/vmw/pvrdma_main.c
>>> +++ b/hw/rdma/vmw/pvrdma_main.c
>>> @@ -286,8 +286,78 @@ static void init_ports(PVRDMADev *dev, Error **errp)
>>> }
>>> }
>>> +static void uninit_msix(PCIDevice *pdev, int used_vectors)
>>> +{
>>> + PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> + int i;
>>> +
>>> + for (i = 0; i < used_vectors; i++) {
>>> + msix_vector_unuse(pdev, i);
>>> + }
>>> +
>>> + msix_uninit(pdev, &dev->msix, &dev->msix);
>>> +}
>>> +
>>> +static int init_msix(PCIDevice *pdev, Error **errp)
>>> +{
>>> + PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> + int i;
>>> + int rc;
>>> +
>>> + rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
>>> + RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
>>> + RDMA_MSIX_PBA, 0, NULL);
>>> +
>>> + if (rc < 0) {
>>> + error_setg(errp, "Failed to initialize MSI-X");
>>> + return rc;
>>> + }
>>> +
>>> + for (i = 0; i < RDMA_MAX_INTRS; i++) {
>>> + rc = msix_vector_use(PCI_DEVICE(dev), i);
>>> + if (rc < 0) {
>>> + error_setg(errp, "Fail mark MSI-X vector %d", i);
>>> + uninit_msix(pdev, i);
>>> + return rc;
>>> + }
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>> The above functions were only moved around the same file. Right?
> Correct.
>
>> I suggest keeping them as they were for easier review.
> I think that this is what i did, the rest is the work of diff :)
>
> Anyways, functions should be gathered as groups and now looks like they are
> so better have one-time-"hard"-review and have file looks organized.
>
>>> +static void pvrdma_fini(PCIDevice *pdev)
>>> +{
>>> + PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> +
>>> + pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
>>> + PCI_FUNC(pdev->devfn));
>>> +
>>> + pvrdma_qp_ops_fini();
>>> +
>>> + rdma_rm_fini(&dev->rdma_dev_res);
>>> +
>>> + rdma_backend_fini(&dev->backend_dev);
>>> +
>>> + free_dsr(dev);
>>> +
>>> + if (msix_enabled(pdev)) {
>>> + uninit_msix(pdev, RDMA_MAX_INTRS);
>>> + }
>>> +}
>>> +
>>> +static void pvrdma_stop(PVRDMADev *dev)
>>> +{
>>> + rdma_backend_stop(&dev->backend_dev);
>>> +}
>>> +
>>> +static void pvrdma_start(PVRDMADev *dev)
>>> +{
>>> + rdma_backend_start(&dev->backend_dev);
>>> +}
>>> +
>> You might not need the above functions for now.
> Yeah, agree but how about leave it just as place holder for others that
> will probably join and also from modularity perspectives better that caller
> will not be aware of what exactly needs to be start/stop.
Sure.
Thanks,
Marcel
>
> Thought?
>>> static void activate_device(PVRDMADev *dev)
>>> {
>>> + pvrdma_start(dev);
>>> set_reg_val(dev, PVRDMA_REG_ERR, 0);
>>> pr_dbg("Device activated\n");
>>> }
>>> @@ -300,7 +370,10 @@ static int unquiesce_device(PVRDMADev *dev)
>>> static int reset_device(PVRDMADev *dev)
>>> {
>>> + pvrdma_stop(dev);
>>> +
>>> pr_dbg("Device reset complete\n");
>>> +
>>> return 0;
>>> }
>>> @@ -469,45 +542,6 @@ static void init_regs(PCIDevice *pdev)
>>> set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF);
>>> }
>>> -static void uninit_msix(PCIDevice *pdev, int used_vectors)
>>> -{
>>> - PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> - int i;
>>> -
>>> - for (i = 0; i < used_vectors; i++) {
>>> - msix_vector_unuse(pdev, i);
>>> - }
>>> -
>>> - msix_uninit(pdev, &dev->msix, &dev->msix);
>>> -}
>>> -
>>> -static int init_msix(PCIDevice *pdev, Error **errp)
>>> -{
>>> - PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> - int i;
>>> - int rc;
>>> -
>>> - rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
>>> - RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
>>> - RDMA_MSIX_PBA, 0, NULL);
>>> -
>>> - if (rc < 0) {
>>> - error_setg(errp, "Failed to initialize MSI-X");
>>> - return rc;
>>> - }
>>> -
>>> - for (i = 0; i < RDMA_MAX_INTRS; i++) {
>>> - rc = msix_vector_use(PCI_DEVICE(dev), i);
>>> - if (rc < 0) {
>>> - error_setg(errp, "Fail mark MSI-X vercor %d", i);
>>> - uninit_msix(pdev, i);
>>> - return rc;
>>> - }
>>> - }
>>> -
>>> - return 0;
>>> -}
>>> -
>>> static void init_dev_caps(PVRDMADev *dev)
>>> {
>>> size_t pg_tbl_bytes = TARGET_PAGE_SIZE *
>>> @@ -602,22 +636,7 @@ out:
>>> static void pvrdma_exit(PCIDevice *pdev)
>>> {
>>> - PVRDMADev *dev = PVRDMA_DEV(pdev);
>>> -
>>> - pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
>>> - PCI_FUNC(pdev->devfn));
>>> -
>>> - pvrdma_qp_ops_fini();
>>> -
>>> - rdma_rm_fini(&dev->rdma_dev_res);
>>> -
>>> - rdma_backend_fini(&dev->backend_dev);
>>> -
>>> - free_dsr(dev);
>>> -
>>> - if (msix_enabled(pdev)) {
>>> - uninit_msix(pdev, RDMA_MAX_INTRS);
>>> - }
>>> + pvrdma_fini(pdev);
>>> }
>>> static void pvrdma_class_init(ObjectClass *klass, void *data)
>>
>>
>> Reviewed-by: Marcel Apfelbaum<marcel.apfelbaum@gmail.com>
>>
>> Thanks,
>> Marcel
next prev parent reply other threads:[~2018-07-24 19:53 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-07-16 7:40 [Qemu-devel] [PATCH 00/13] Misc fixes for pvrdma device Yuval Shaia
2018-07-16 7:40 ` [Qemu-devel] [PATCH 01/13] hw/rdma: Make distinction between device init and start modes Yuval Shaia
2018-07-24 12:08 ` Marcel Apfelbaum
2018-07-24 19:29 ` Yuval Shaia
2018-07-24 19:53 ` Marcel Apfelbaum [this message]
2018-07-16 7:40 ` [Qemu-devel] [PATCH 02/13] hw/pvrdma: Bugfix - provide the correct attr_mask to query_qp Yuval Shaia
2018-07-16 10:38 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 03/13] hw/rdma: Modify debug macros Yuval Shaia
2018-07-24 12:10 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 04/13] hw/pvrdma: Clean CQE before use Yuval Shaia
2018-07-16 10:41 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 05/13] hw/pvrdma: Make default pkey 0xFFFF Yuval Shaia
2018-07-16 10:42 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 06/13] hw/rdma: Get rid of unneeded structure Yuval Shaia
2018-07-16 10:44 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 07/13] hw/rdma: Do not allocate memory for non-dma MR Yuval Shaia
2018-07-24 12:19 ` Marcel Apfelbaum
2018-08-05 14:56 ` Yuval Shaia
2018-07-16 7:40 ` [Qemu-devel] [PATCH 08/13] hw/rdma: Reorder resource cleanup Yuval Shaia
2018-07-16 10:44 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 09/13] hw/pvrdma: Cosmetic change - indent right Yuval Shaia
2018-07-16 10:45 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 10/13] hw/rdma: Cosmetic change - move to generic function Yuval Shaia
2018-07-16 10:46 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 11/13] hw/rdma: Print backend QP number in hex format Yuval Shaia
2018-07-16 10:46 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 12/13] hw/rdma: Bugfix: Support non-aligned buffers Yuval Shaia
2018-07-24 12:22 ` Marcel Apfelbaum
2018-07-16 7:40 ` [Qemu-devel] [PATCH 13/13] hw/rdma: Save pci dev in backend_dev Yuval Shaia
2018-07-16 10:49 ` Marcel Apfelbaum
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=313a3e4b-9026-76fb-f8c7-edc412de2eff@gmail.com \
--to=marcel.apfelbaum@gmail.com \
--cc=qemu-devel@nongnu.org \
--cc=yuval.shaia@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).