From: Leon Romanovsky <leon@kernel.org>
To: Abhijit Gangurde <abhijit.gangurde@amd.com>
Cc: shannon.nelson@amd.com, brett.creeley@amd.com,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, corbet@lwn.net, jgg@ziepe.ca,
andrew+netdev@lunn.ch, allen.hubbe@amd.com,
nikhil.agarwal@amd.com, linux-rdma@vger.kernel.org,
netdev@vger.kernel.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, Andrew Boyer <andrew.boyer@amd.com>
Subject: Re: [PATCH v3 09/14] RDMA/ionic: Create device queues to support admin operations
Date: Tue, 1 Jul 2025 13:24:09 +0300 [thread overview]
Message-ID: <20250701102409.GA118736@unreal> (raw)
In-Reply-To: <20250624121315.739049-10-abhijit.gangurde@amd.com>
On Tue, Jun 24, 2025 at 05:43:10PM +0530, Abhijit Gangurde wrote:
> Setup RDMA admin queues using device command exposed over
> auxiliary device and manage these queues using ida.
>
> Co-developed-by: Andrew Boyer <andrew.boyer@amd.com>
> Signed-off-by: Andrew Boyer <andrew.boyer@amd.com>
> Co-developed-by: Allen Hubbe <allen.hubbe@amd.com>
> Signed-off-by: Allen Hubbe <allen.hubbe@amd.com>
> Signed-off-by: Abhijit Gangurde <abhijit.gangurde@amd.com>
> ---
> v2->v3
> - Fixed lockdep warning
> - Used IDA for resource id allocation
> - Removed rw locks around xarrays
>
> drivers/infiniband/hw/ionic/ionic_admin.c | 1169 +++++++++++++++++
> .../infiniband/hw/ionic/ionic_controlpath.c | 184 +++
> drivers/infiniband/hw/ionic/ionic_fw.h | 164 +++
> drivers/infiniband/hw/ionic/ionic_ibdev.c | 56 +
> drivers/infiniband/hw/ionic/ionic_ibdev.h | 225 ++++
> drivers/infiniband/hw/ionic/ionic_pgtbl.c | 113 ++
> drivers/infiniband/hw/ionic/ionic_queue.c | 52 +
> drivers/infiniband/hw/ionic/ionic_queue.h | 234 ++++
> drivers/infiniband/hw/ionic/ionic_res.h | 154 +++
> 9 files changed, 2351 insertions(+)
> create mode 100644 drivers/infiniband/hw/ionic/ionic_admin.c
> create mode 100644 drivers/infiniband/hw/ionic/ionic_controlpath.c
> create mode 100644 drivers/infiniband/hw/ionic/ionic_fw.h
> create mode 100644 drivers/infiniband/hw/ionic/ionic_pgtbl.c
> create mode 100644 drivers/infiniband/hw/ionic/ionic_queue.c
> create mode 100644 drivers/infiniband/hw/ionic/ionic_queue.h
> create mode 100644 drivers/infiniband/hw/ionic/ionic_res.h
<...>
> +static void ionic_admin_timedout(struct ionic_aq *aq)
> +{
> + struct ionic_cq *cq = &aq->vcq->cq[0];
> + struct ionic_ibdev *dev = aq->dev;
> + unsigned long irqflags;
> + u16 pos;
> +
> + spin_lock_irqsave(&aq->lock, irqflags);
> + if (ionic_queue_empty(&aq->q))
> + goto out;
> +
> + /* Reset ALL adminq if any one times out */
> + if (aq->admin_state < IONIC_ADMIN_KILLED)
> + queue_work(ionic_evt_workq, &dev->reset_work);
> +
> + ibdev_err(&dev->ibdev, "admin command timed out, aq %d\n", aq->aqid);
> +
> + ibdev_warn(&dev->ibdev, "admin timeout was set for %ums\n",
> + (u32)jiffies_to_msecs(IONIC_ADMIN_TIMEOUT));
> + ibdev_warn(&dev->ibdev, "admin inactivity for %ums\n",
> + (u32)jiffies_to_msecs(jiffies - aq->stamp));
> +
> + ibdev_warn(&dev->ibdev, "admin commands outstanding %u\n",
> + ionic_queue_length(&aq->q));
> + ibdev_warn(&dev->ibdev, "%s more commands pending\n",
> + list_empty(&aq->wr_post) ? "no" : "some");
> +
> + pos = cq->q.prod;
> +
> + ibdev_warn(&dev->ibdev, "admin cq pos %u (next to complete)\n", pos);
> + print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
> + ionic_queue_at(&cq->q, pos),
> + BIT(cq->q.stride_log2), true);
> +
> + pos = (pos - 1) & cq->q.mask;
> +
> + ibdev_warn(&dev->ibdev, "admin cq pos %u (last completed)\n", pos);
> + print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
> + ionic_queue_at(&cq->q, pos),
> + BIT(cq->q.stride_log2), true);
> +
> + pos = aq->q.cons;
> +
> + ibdev_warn(&dev->ibdev, "admin pos %u (next to complete)\n", pos);
> + print_hex_dump(KERN_WARNING, "cmd ", DUMP_PREFIX_OFFSET, 16, 1,
> + ionic_queue_at(&aq->q, pos),
> + BIT(aq->q.stride_log2), true);
> +
> + pos = (aq->q.prod - 1) & aq->q.mask;
> + if (pos == aq->q.cons)
> + goto out;
> +
> + ibdev_warn(&dev->ibdev, "admin pos %u (last posted)\n", pos);
> + print_hex_dump(KERN_WARNING, "cmd ", DUMP_PREFIX_OFFSET, 16, 1,
> + ionic_queue_at(&aq->q, pos),
> + BIT(aq->q.stride_log2), true);
> +
> +out:
> + spin_unlock_irqrestore(&aq->lock, irqflags);
> +}
Please reduce number of debug prints. You are supposed to send driver
that works and not for the debug session.
> +
> +static void ionic_admin_reset_dwork(struct ionic_ibdev *dev)
> +{
> + if (atomic_read(&dev->admin_state) >= IONIC_ADMIN_KILLED)
> + return;
<...>
> + if (aq->admin_state >= IONIC_ADMIN_KILLED)
> + return;
<...>
> + ibdev_dbg(&dev->ibdev, "poll admin cq %u prod %u\n",
> + cq->cqid, cq->q.prod);
> + print_hex_dump_debug("cqe ", DUMP_PREFIX_OFFSET, 16, 1,
> + qcqe, BIT(cq->q.stride_log2), true);
We have restrack to print CQE and other objects, please use it.
> + *cqe = qcqe;
> +
> + return true;
> +}
> +
> +static void ionic_admin_poll_locked(struct ionic_aq *aq)
> +{
> + struct ionic_cq *cq = &aq->vcq->cq[0];
> + struct ionic_admin_wr *wr, *wr_next;
> + struct ionic_ibdev *dev = aq->dev;
> + u32 wr_strides, avlbl_strides;
> + struct ionic_v1_cqe *cqe;
> + u32 qtf, qid;
> + u16 old_prod;
> + u8 type;
> +
> + lockdep_assert_held(&aq->lock);
> +
> + if (aq->admin_state >= IONIC_ADMIN_KILLED) {
IONIC_ADMIN_KILLED is the last, there is no ">" option.
> + list_for_each_entry_safe(wr, wr_next, &aq->wr_prod, aq_ent) {
> + INIT_LIST_HEAD(&wr->aq_ent);
> + aq->q_wr[wr->status].wr = NULL;
> + wr->status = aq->admin_state;
> + complete_all(&wr->work);
> + }
> + INIT_LIST_HEAD(&aq->wr_prod);
<...>
> + if (do_reset)
> + /* Reset device on a timeout */
> + ionic_admin_timedout(bad_aq);
I wonder why RDMA driver resets device and not the one who owns PCI.
> + else if (do_reschedule)
> + /* Try to poll again later */
> + ionic_admin_reset_dwork(dev);
> +}
<...>
> + vcq = kzalloc(sizeof(*vcq), GFP_KERNEL);
> + if (!vcq) {
> + rc = -ENOMEM;
> + goto err_alloc;
> + }
> +
> + vcq->ibcq.device = &dev->ibdev;
> + vcq->ibcq.uobject = NULL;
1. There is no need in explicit NULL here, vcq was allocated with kzalloc()
2. Maybe rdma_zalloc_drv_obj() should be used here.
> + vcq->ibcq.comp_handler = ionic_rdma_admincq_comp;
> + vcq->ibcq.event_handler = ionic_rdma_admincq_event;
> + vcq->ibcq.cq_context = NULL;
> + atomic_set(&vcq->ibcq.usecnt, 0);
<...>
> + aq->admin_state = IONIC_ADMIN_KILLED;
<...>
> + old_state = atomic_cmpxchg(&dev->admin_state, IONIC_ADMIN_ACTIVE,
> + IONIC_ADMIN_PAUSED);
> + if (old_state != IONIC_ADMIN_ACTIVE)
In all these places you are mixing enum_admin_state and atomic_t for
same values, but different variable. Please chose or atomic_t or enum.
> + return;
> +
> + /* Pause all the AQs */
> + local_irq_save(irqflags);
> + for (i = 0; i < dev->lif_cfg.aq_count; i++) {
> + struct ionic_aq *aq = dev->aq_vec[i];
> +
> + spin_lock(&aq->lock);
> + /* pause rdma admin queues to reset device */
> + if (aq->admin_state == IONIC_ADMIN_ACTIVE)
> + aq->admin_state = IONIC_ADMIN_PAUSED;
> + spin_unlock(&aq->lock);
> + }
> + local_irq_restore(irqflags);
> +
> + rc = ionic_rdma_reset_devcmd(dev);
> + if (unlikely(rc)) {
> + ibdev_err(&dev->ibdev, "failed to reset rdma %d\n", rc);
> + ionic_request_rdma_reset(dev->lif_cfg.lif);
> + }
> +
> + ionic_kill_ibdev(dev, fatal_path);
> +}
<...>
> +static void ionic_cq_event(struct ionic_ibdev *dev, u32 cqid, u8 code)
> +{
> + struct ib_event ibev;
> + struct ionic_cq *cq;
> +
> + rcu_read_lock();
> + cq = xa_load(&dev->cq_tbl, cqid);
> + if (cq)
> + kref_get(&cq->cq_kref);
> + rcu_read_unlock();
What and how does this RCU protect?
> +
> + if (!cq) {
Is it possible?
> + ibdev_dbg(&dev->ibdev,
> + "missing cqid %#x code %u\n", cqid, code);
> + return;
> + }
<...>
> module_init(ionic_mod_init);
> diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h
> index e13adff390d7..e7563c0429fc 100644
> --- a/drivers/infiniband/hw/ionic/ionic_ibdev.h
> +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h
> @@ -4,18 +4,243 @@
> #ifndef _IONIC_IBDEV_H_
> #define _IONIC_IBDEV_H_
>
> +#include <rdma/ib_umem.h>
> #include <rdma/ib_verbs.h>
> +
> #include <ionic_api.h>
> +#include <ionic_regs.h>
> +
> +#include "ionic_fw.h"
> +#include "ionic_queue.h"
> +#include "ionic_res.h"
>
> #include "ionic_lif_cfg.h"
>
> +#define DRIVER_NAME "ionic_rdma"
It is KBUILD_MODNAME, please use it.
> +#define DRIVER_SHORTNAME "ionr"
> +
> #define IONIC_MIN_RDMA_VERSION 0
> #define IONIC_MAX_RDMA_VERSION 2
Nothing from the above is applicable to upstream code.
Thanks
next prev parent reply other threads:[~2025-07-01 10:24 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-24 12:13 [PATCH v3 00/14] Introduce AMD Pensando RDMA driver Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 01/14] net: ionic: Create an auxiliary device for rdma driver Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 02/14] net: ionic: Update LIF identity with additional RDMA capabilities Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 03/14] net: ionic: Export the APIs from net driver to support device commands Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 04/14] net: ionic: Provide RDMA reset support for the RDMA driver Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 05/14] net: ionic: Provide interrupt allocation " Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 06/14] net: ionic: Provide doorbell and CMB region information Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 07/14] RDMA: Add IONIC to rdma_driver_id definition Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 08/14] RDMA/ionic: Register auxiliary module for ionic ethernet adapter Abhijit Gangurde
2025-06-26 7:26 ` Leon Romanovsky
2025-06-27 10:18 ` Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 09/14] RDMA/ionic: Create device queues to support admin operations Abhijit Gangurde
2025-07-01 10:24 ` Leon Romanovsky [this message]
2025-07-03 6:59 ` Abhijit Gangurde
2025-07-03 8:41 ` Leon Romanovsky
2025-07-04 10:45 ` Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 10/14] RDMA/ionic: Register device ops for control path Abhijit Gangurde
2025-07-01 10:38 ` Leon Romanovsky
2025-07-02 13:18 ` Jason Gunthorpe
2025-07-02 18:00 ` Leon Romanovsky
2025-07-03 7:19 ` Abhijit Gangurde
2025-07-04 17:08 ` Leon Romanovsky
2025-07-07 5:27 ` Abhijit Gangurde
2025-07-07 7:21 ` Leon Romanovsky
2025-07-07 14:56 ` Abhijit Gangurde
2025-07-07 16:46 ` Leon Romanovsky
2025-07-08 10:05 ` Abhijit Gangurde
2025-07-13 6:27 ` Leon Romanovsky
2025-07-15 19:16 ` Jason Gunthorpe
2025-07-20 8:39 ` Abhijit Gangurde
2025-07-03 7:00 ` Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 11/14] RDMA/ionic: Register device ops for datapath Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 12/14] RDMA/ionic: Register device ops for miscellaneous functionality Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 13/14] RDMA/ionic: Implement device stats ops Abhijit Gangurde
2025-06-24 12:13 ` [PATCH v3 14/14] RDMA/ionic: Add Makefile/Kconfig to kernel build environment Abhijit Gangurde
2025-06-25 21:44 ` [PATCH v3 00/14] Introduce AMD Pensando RDMA driver Jakub Kicinski
2025-06-26 7:07 ` Leon Romanovsky
2025-06-27 10:06 ` Abhijit Gangurde
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250701102409.GA118736@unreal \
--to=leon@kernel.org \
--cc=abhijit.gangurde@amd.com \
--cc=allen.hubbe@amd.com \
--cc=andrew+netdev@lunn.ch \
--cc=andrew.boyer@amd.com \
--cc=brett.creeley@amd.com \
--cc=corbet@lwn.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=jgg@ziepe.ca \
--cc=kuba@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=nikhil.agarwal@amd.com \
--cc=pabeni@redhat.com \
--cc=shannon.nelson@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.