* [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
@ 2026-02-11 13:10 Tom Sela
2026-02-11 13:13 ` Jason Gunthorpe
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: Tom Sela @ 2026-02-11 13:10 UTC (permalink / raw)
To: mrgolin, tomsela, jgg, leon, linux-rdma
Cc: sleybo, matua, gal.pressman, Yonatan Nachum
Add tracking of unique Address Handle usage to provide visibility into
active AH resource consumption. The implementation uses a hash table to
deduplicate identical AH requests that receive the same handle, ensuring
accurate resource counting.
The counter will be exposed via sysfs device attribute.
Reviewed-by: Michael Margolin <mrgolin@amazon.com>
Reviewed-by: Yonatan Nachum <ynachum@amazon.com>
Signed-off-by: Tom Sela <tomsela@amazon.com>
---
drivers/infiniband/hw/efa/Makefile | 4 +-
drivers/infiniband/hw/efa/efa.h | 5 ++-
drivers/infiniband/hw/efa/efa_main.c | 13 ++++++-
drivers/infiniband/hw/efa/efa_sysfs.c | 33 ++++++++++++++++
drivers/infiniband/hw/efa/efa_sysfs.h | 15 +++++++
drivers/infiniband/hw/efa/efa_verbs.c | 56 ++++++++++++++++++++++++++-
6 files changed, 120 insertions(+), 6 deletions(-)
create mode 100644 drivers/infiniband/hw/efa/efa_sysfs.c
create mode 100644 drivers/infiniband/hw/efa/efa_sysfs.h
diff --git a/drivers/infiniband/hw/efa/Makefile b/drivers/infiniband/hw/efa/Makefile
index 6e83083af0bc..0ba04eab17a1 100644
--- a/drivers/infiniband/hw/efa/Makefile
+++ b/drivers/infiniband/hw/efa/Makefile
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
-# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+# Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
#
# Makefile for Amazon Elastic Fabric Adapter (EFA) device driver.
#
obj-$(CONFIG_INFINIBAND_EFA) += efa.o
-efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o
+efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o efa_sysfs.o
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 96f9c3bc98b2..d332bc4edcb7 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_H_
@@ -69,6 +69,9 @@ struct efa_dev {
/* Only stores CQs with interrupts enabled */
struct xarray cqs_xa;
+ /* AH tracking xarray and counter*/
+ struct xarray ahs_xa;
+ atomic64_t ah_count;
};
struct efa_ucontext {
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 6c415b9adb5f..3c6fa5af941a 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/module.h>
@@ -12,6 +12,7 @@
#include <rdma/uverbs_ioctl.h>
#include "efa.h"
+#include "efa_sysfs.h"
#define PCI_DEV_ID_EFA0_VF 0xefa0
#define PCI_DEV_ID_EFA1_VF 0xefa1
@@ -561,6 +562,8 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
edev->dmadev = &pdev->dev;
dev->pdev = pdev;
xa_init(&dev->cqs_xa);
+ xa_init(&dev->ahs_xa);
+ atomic64_set(&dev->ah_count, 0);
pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
if (EFA_BASE_BAR_MASK & ~pci_mem_bars) {
@@ -619,8 +622,14 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
if (err)
goto err_free_mgmnt_irq;
+ err = efa_sysfs_init(dev);
+ if (err)
+ goto err_admin_destroy;
+
return dev;
+err_admin_destroy:
+ efa_com_admin_destroy(edev);
err_free_mgmnt_irq:
efa_free_irq(dev, &dev->admin_irq);
err_disable_msix:
@@ -645,6 +654,7 @@ static void efa_remove_device(struct pci_dev *pdev,
struct efa_com_dev *edev;
edev = &dev->edev;
+ efa_sysfs_destroy(dev);
efa_com_dev_reset(edev, reset_reason);
efa_com_admin_destroy(edev);
efa_free_irq(dev, &dev->admin_irq);
@@ -653,6 +663,7 @@ static void efa_remove_device(struct pci_dev *pdev,
devm_iounmap(&pdev->dev, edev->reg_bar);
efa_release_bars(dev, EFA_BASE_BAR_MASK);
xa_destroy(&dev->cqs_xa);
+ xa_destroy(&dev->ahs_xa);
ib_dealloc_device(&dev->ibdev);
pci_disable_device(pdev);
}
diff --git a/drivers/infiniband/hw/efa/efa_sysfs.c b/drivers/infiniband/hw/efa/efa_sysfs.c
new file mode 100644
index 000000000000..79602cf77424
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_sysfs.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <linux/device.h>
+#include <linux/sysfs.h>
+
+#include "efa_sysfs.h"
+
+static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
+
+ return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
+}
+
+static DEVICE_ATTR_RO(ah_count);
+
+int efa_sysfs_init(struct efa_dev *dev)
+{
+ struct device *device = &dev->pdev->dev;
+
+ if (device_create_file(device, &dev_attr_ah_count))
+ dev_err(device, "Failed to create AH count sysfs file\n");
+
+ return 0;
+}
+
+void efa_sysfs_destroy(struct efa_dev *dev)
+{
+ device_remove_file(&dev->pdev->dev, &dev_attr_ah_count);
+}
diff --git a/drivers/infiniband/hw/efa/efa_sysfs.h b/drivers/infiniband/hw/efa/efa_sysfs.h
new file mode 100644
index 000000000000..fda3a885c150
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_sysfs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_SYSFS_H_
+#define _EFA_SYSFS_H_
+
+#include "efa.h"
+
+int efa_sysfs_init(struct efa_dev *dev);
+
+void efa_sysfs_destroy(struct efa_dev *dev);
+
+#endif /* _EFA_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 22d3e25c3b9d..1d8cb4a7f946 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/dma-buf.h>
@@ -2104,6 +2104,51 @@ int efa_mmap(struct ib_ucontext *ibucontext,
return __efa_mmap(dev, ucontext, vma);
}
+static int efa_add_ah(struct efa_dev *dev, u16 ah)
+{
+ unsigned long refcount;
+ void *entry;
+ int err;
+
+ xa_lock(&dev->ahs_xa);
+ entry = xa_load(&dev->ahs_xa, ah);
+ refcount = entry ? xa_to_value(entry) : 0;
+ if (refcount == 0)
+ atomic64_inc(&dev->ah_count);
+
+ err = xa_err(__xa_store(&dev->ahs_xa, ah, xa_mk_value(refcount + 1), GFP_ATOMIC));
+ xa_unlock(&dev->ahs_xa);
+
+ return err;
+}
+
+static int efa_remove_ah(struct efa_dev *dev, u16 ah)
+{
+ unsigned long refcount;
+ void *entry;
+ int err;
+
+ xa_lock(&dev->ahs_xa);
+ entry = xa_load(&dev->ahs_xa, ah);
+ refcount = entry ? xa_to_value(entry) : 0;
+ if (refcount == 0) {
+ /* AH already removed or never existed - unexpected but handle gracefully */
+ xa_unlock(&dev->ahs_xa);
+ return 0;
+ }
+
+ refcount--;
+
+ if (refcount == 0) {
+ err = xa_err(__xa_erase(&dev->ahs_xa, ah));
+ atomic64_dec(&dev->ah_count);
+ } else {
+ err = xa_err(__xa_store(&dev->ahs_xa, ah, xa_mk_value(refcount), GFP_ATOMIC));
+ }
+ xa_unlock(&dev->ahs_xa);
+ return err;
+}
+
static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
{
struct efa_com_destroy_ah_params params = {
@@ -2150,6 +2195,10 @@ int efa_create_ah(struct ib_ah *ibah,
memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
ah->ah = result.ah;
+ err = efa_add_ah(dev, ah->ah);
+ if (err)
+ goto err_destroy_ah;
+
resp.efa_address_handle = result.ah;
if (udata->outlen) {
@@ -2158,13 +2207,15 @@ int efa_create_ah(struct ib_ah *ibah,
if (err) {
ibdev_dbg(&dev->ibdev,
"Failed to copy udata for create_ah response\n");
- goto err_destroy_ah;
+ goto err_remove_ah;
}
}
ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
return 0;
+err_remove_ah:
+ efa_remove_ah(dev, ah->ah);
err_destroy_ah:
efa_ah_destroy(dev, ah);
err_out:
@@ -2185,6 +2236,7 @@ int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
return -EOPNOTSUPP;
}
+ efa_remove_ah(dev, ah->ah);
efa_ah_destroy(dev, ah);
return 0;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-11 13:10 [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure Tom Sela
@ 2026-02-11 13:13 ` Jason Gunthorpe
2026-02-12 6:52 ` Gal Pressman
2026-02-11 18:48 ` yanjun.zhu
2026-02-12 6:52 ` Gal Pressman
2 siblings, 1 reply; 18+ messages in thread
From: Jason Gunthorpe @ 2026-02-11 13:13 UTC (permalink / raw)
To: Tom Sela
Cc: mrgolin, leon, linux-rdma, sleybo, matua, gal.pressman,
Yonatan Nachum
On Wed, Feb 11, 2026 at 01:10:48PM +0000, Tom Sela wrote:
> +static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> + struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
> +
> + return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
> +}
> +
> +static DEVICE_ATTR_RO(ah_count);
> +
> +int efa_sysfs_init(struct efa_dev *dev)
> +{
> + struct device *device = &dev->pdev->dev;
> +
> + if (device_create_file(device, &dev_attr_ah_count))
> + dev_err(device, "Failed to create AH count sysfs file\n");
This is not the right way to use sysfs in rdma drivers.
Also we have netlink counters as the prefered approach why are you
using sysfs?
Jason
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-11 13:10 [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure Tom Sela
2026-02-11 13:13 ` Jason Gunthorpe
@ 2026-02-11 18:48 ` yanjun.zhu
2026-02-12 6:52 ` Gal Pressman
2 siblings, 0 replies; 18+ messages in thread
From: yanjun.zhu @ 2026-02-11 18:48 UTC (permalink / raw)
To: Tom Sela, mrgolin, jgg, leon, linux-rdma
Cc: sleybo, matua, gal.pressman, Yonatan Nachum
On 2/11/26 5:10 AM, Tom Sela wrote:
> Add tracking of unique Address Handle usage to provide visibility into
> active AH resource consumption. The implementation uses a hash table to
It seems that xa is used instead of hash table in your commit?
Zhu Yanjun
> deduplicate identical AH requests that receive the same handle, ensuring
> accurate resource counting.
>
> The counter will be exposed via sysfs device attribute.
>
> Reviewed-by: Michael Margolin <mrgolin@amazon.com>
> Reviewed-by: Yonatan Nachum <ynachum@amazon.com>
> Signed-off-by: Tom Sela <tomsela@amazon.com>
> ---
> drivers/infiniband/hw/efa/Makefile | 4 +-
> drivers/infiniband/hw/efa/efa.h | 5 ++-
> drivers/infiniband/hw/efa/efa_main.c | 13 ++++++-
> drivers/infiniband/hw/efa/efa_sysfs.c | 33 ++++++++++++++++
> drivers/infiniband/hw/efa/efa_sysfs.h | 15 +++++++
> drivers/infiniband/hw/efa/efa_verbs.c | 56 ++++++++++++++++++++++++++-
> 6 files changed, 120 insertions(+), 6 deletions(-)
> create mode 100644 drivers/infiniband/hw/efa/efa_sysfs.c
> create mode 100644 drivers/infiniband/hw/efa/efa_sysfs.h
>
> diff --git a/drivers/infiniband/hw/efa/Makefile b/drivers/infiniband/hw/efa/Makefile
> index 6e83083af0bc..0ba04eab17a1 100644
> --- a/drivers/infiniband/hw/efa/Makefile
> +++ b/drivers/infiniband/hw/efa/Makefile
> @@ -1,9 +1,9 @@
> # SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
> -# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
> +# Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
> #
> # Makefile for Amazon Elastic Fabric Adapter (EFA) device driver.
> #
>
> obj-$(CONFIG_INFINIBAND_EFA) += efa.o
>
> -efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o
> +efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o efa_sysfs.o
> diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
> index 96f9c3bc98b2..d332bc4edcb7 100644
> --- a/drivers/infiniband/hw/efa/efa.h
> +++ b/drivers/infiniband/hw/efa/efa.h
> @@ -1,6 +1,6 @@
> /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
> /*
> - * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
> + * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
> */
>
> #ifndef _EFA_H_
> @@ -69,6 +69,9 @@ struct efa_dev {
>
> /* Only stores CQs with interrupts enabled */
> struct xarray cqs_xa;
> + /* AH tracking xarray and counter*/
> + struct xarray ahs_xa;
> + atomic64_t ah_count;
> };
>
> struct efa_ucontext {
> diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
> index 6c415b9adb5f..3c6fa5af941a 100644
> --- a/drivers/infiniband/hw/efa/efa_main.c
> +++ b/drivers/infiniband/hw/efa/efa_main.c
> @@ -1,6 +1,6 @@
> // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
> /*
> - * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
> + * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
> */
>
> #include <linux/module.h>
> @@ -12,6 +12,7 @@
> #include <rdma/uverbs_ioctl.h>
>
> #include "efa.h"
> +#include "efa_sysfs.h"
>
> #define PCI_DEV_ID_EFA0_VF 0xefa0
> #define PCI_DEV_ID_EFA1_VF 0xefa1
> @@ -561,6 +562,8 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
> edev->dmadev = &pdev->dev;
> dev->pdev = pdev;
> xa_init(&dev->cqs_xa);
> + xa_init(&dev->ahs_xa);
> + atomic64_set(&dev->ah_count, 0);
>
> pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
> if (EFA_BASE_BAR_MASK & ~pci_mem_bars) {
> @@ -619,8 +622,14 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
> if (err)
> goto err_free_mgmnt_irq;
>
> + err = efa_sysfs_init(dev);
> + if (err)
> + goto err_admin_destroy;
> +
> return dev;
>
> +err_admin_destroy:
> + efa_com_admin_destroy(edev);
> err_free_mgmnt_irq:
> efa_free_irq(dev, &dev->admin_irq);
> err_disable_msix:
> @@ -645,6 +654,7 @@ static void efa_remove_device(struct pci_dev *pdev,
> struct efa_com_dev *edev;
>
> edev = &dev->edev;
> + efa_sysfs_destroy(dev);
> efa_com_dev_reset(edev, reset_reason);
> efa_com_admin_destroy(edev);
> efa_free_irq(dev, &dev->admin_irq);
> @@ -653,6 +663,7 @@ static void efa_remove_device(struct pci_dev *pdev,
> devm_iounmap(&pdev->dev, edev->reg_bar);
> efa_release_bars(dev, EFA_BASE_BAR_MASK);
> xa_destroy(&dev->cqs_xa);
> + xa_destroy(&dev->ahs_xa);
> ib_dealloc_device(&dev->ibdev);
> pci_disable_device(pdev);
> }
> diff --git a/drivers/infiniband/hw/efa/efa_sysfs.c b/drivers/infiniband/hw/efa/efa_sysfs.c
> new file mode 100644
> index 000000000000..79602cf77424
> --- /dev/null
> +++ b/drivers/infiniband/hw/efa/efa_sysfs.c
> @@ -0,0 +1,33 @@
> +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
> +/*
> + * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
> + */
> +
> +#include <linux/device.h>
> +#include <linux/sysfs.h>
> +
> +#include "efa_sysfs.h"
> +
> +static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> + struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
> +
> + return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
> +}
> +
> +static DEVICE_ATTR_RO(ah_count);
> +
> +int efa_sysfs_init(struct efa_dev *dev)
> +{
> + struct device *device = &dev->pdev->dev;
> +
> + if (device_create_file(device, &dev_attr_ah_count))
> + dev_err(device, "Failed to create AH count sysfs file\n");
> +
> + return 0;
> +}
> +
> +void efa_sysfs_destroy(struct efa_dev *dev)
> +{
> + device_remove_file(&dev->pdev->dev, &dev_attr_ah_count);
> +}
> diff --git a/drivers/infiniband/hw/efa/efa_sysfs.h b/drivers/infiniband/hw/efa/efa_sysfs.h
> new file mode 100644
> index 000000000000..fda3a885c150
> --- /dev/null
> +++ b/drivers/infiniband/hw/efa/efa_sysfs.h
> @@ -0,0 +1,15 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
> +/*
> + * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
> + */
> +
> +#ifndef _EFA_SYSFS_H_
> +#define _EFA_SYSFS_H_
> +
> +#include "efa.h"
> +
> +int efa_sysfs_init(struct efa_dev *dev);
> +
> +void efa_sysfs_destroy(struct efa_dev *dev);
> +
> +#endif /* _EFA_SYSFS_H_ */
> diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
> index 22d3e25c3b9d..1d8cb4a7f946 100644
> --- a/drivers/infiniband/hw/efa/efa_verbs.c
> +++ b/drivers/infiniband/hw/efa/efa_verbs.c
> @@ -1,6 +1,6 @@
> // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> /*
> - * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
> + * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
> */
>
> #include <linux/dma-buf.h>
> @@ -2104,6 +2104,51 @@ int efa_mmap(struct ib_ucontext *ibucontext,
> return __efa_mmap(dev, ucontext, vma);
> }
>
> +static int efa_add_ah(struct efa_dev *dev, u16 ah)
> +{
> + unsigned long refcount;
> + void *entry;
> + int err;
> +
> + xa_lock(&dev->ahs_xa);
> + entry = xa_load(&dev->ahs_xa, ah);
> + refcount = entry ? xa_to_value(entry) : 0;
> + if (refcount == 0)
> + atomic64_inc(&dev->ah_count);
> +
> + err = xa_err(__xa_store(&dev->ahs_xa, ah, xa_mk_value(refcount + 1), GFP_ATOMIC));
> + xa_unlock(&dev->ahs_xa);
> +
> + return err;
> +}
> +
> +static int efa_remove_ah(struct efa_dev *dev, u16 ah)
> +{
> + unsigned long refcount;
> + void *entry;
> + int err;
> +
> + xa_lock(&dev->ahs_xa);
> + entry = xa_load(&dev->ahs_xa, ah);
> + refcount = entry ? xa_to_value(entry) : 0;
> + if (refcount == 0) {
> + /* AH already removed or never existed - unexpected but handle gracefully */
> + xa_unlock(&dev->ahs_xa);
> + return 0;
> + }
> +
> + refcount--;
> +
> + if (refcount == 0) {
> + err = xa_err(__xa_erase(&dev->ahs_xa, ah));
> + atomic64_dec(&dev->ah_count);
> + } else {
> + err = xa_err(__xa_store(&dev->ahs_xa, ah, xa_mk_value(refcount), GFP_ATOMIC));
> + }
> + xa_unlock(&dev->ahs_xa);
> + return err;
> +}
> +
> static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
> {
> struct efa_com_destroy_ah_params params = {
> @@ -2150,6 +2195,10 @@ int efa_create_ah(struct ib_ah *ibah,
> memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
> ah->ah = result.ah;
>
> + err = efa_add_ah(dev, ah->ah);
> + if (err)
> + goto err_destroy_ah;
> +
> resp.efa_address_handle = result.ah;
>
> if (udata->outlen) {
> @@ -2158,13 +2207,15 @@ int efa_create_ah(struct ib_ah *ibah,
> if (err) {
> ibdev_dbg(&dev->ibdev,
> "Failed to copy udata for create_ah response\n");
> - goto err_destroy_ah;
> + goto err_remove_ah;
> }
> }
> ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
>
> return 0;
>
> +err_remove_ah:
> + efa_remove_ah(dev, ah->ah);
> err_destroy_ah:
> efa_ah_destroy(dev, ah);
> err_out:
> @@ -2185,6 +2236,7 @@ int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
> return -EOPNOTSUPP;
> }
>
> + efa_remove_ah(dev, ah->ah);
> efa_ah_destroy(dev, ah);
> return 0;
> }
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-11 13:10 [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure Tom Sela
2026-02-11 13:13 ` Jason Gunthorpe
2026-02-11 18:48 ` yanjun.zhu
@ 2026-02-12 6:52 ` Gal Pressman
2 siblings, 0 replies; 18+ messages in thread
From: Gal Pressman @ 2026-02-12 6:52 UTC (permalink / raw)
To: Tom Sela, mrgolin, jgg, leon, linux-rdma; +Cc: sleybo, matua, Yonatan Nachum
On 11/02/2026 15:10, Tom Sela wrote:
> +static int efa_add_ah(struct efa_dev *dev, u16 ah)
> +{
> + unsigned long refcount;
> + void *entry;
> + int err;
> +
> + xa_lock(&dev->ahs_xa);
> + entry = xa_load(&dev->ahs_xa, ah);
> + refcount = entry ? xa_to_value(entry) : 0;
> + if (refcount == 0)
> + atomic64_inc(&dev->ah_count);
What happens to this increment if the store fails?
> +
> + err = xa_err(__xa_store(&dev->ahs_xa, ah, xa_mk_value(refcount + 1), GFP_ATOMIC));
> + xa_unlock(&dev->ahs_xa);
> +
> + return err;
> +}
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-11 13:13 ` Jason Gunthorpe
@ 2026-02-12 6:52 ` Gal Pressman
2026-02-12 16:36 ` Leon Romanovsky
0 siblings, 1 reply; 18+ messages in thread
From: Gal Pressman @ 2026-02-12 6:52 UTC (permalink / raw)
To: Jason Gunthorpe, Tom Sela
Cc: mrgolin, leon, linux-rdma, sleybo, matua, Yonatan Nachum
On 11/02/2026 15:13, Jason Gunthorpe wrote:
> On Wed, Feb 11, 2026 at 01:10:48PM +0000, Tom Sela wrote:
>> +static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
>> +{
>> + struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
>> +
>> + return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
>> +}
>> +
>> +static DEVICE_ATTR_RO(ah_count);
>> +
>> +int efa_sysfs_init(struct efa_dev *dev)
>> +{
>> + struct device *device = &dev->pdev->dev;
>> +
>> + if (device_create_file(device, &dev_attr_ah_count))
>> + dev_err(device, "Failed to create AH count sysfs file\n");
>
> This is not the right way to use sysfs in rdma drivers.
>
> Also we have netlink counters as the prefered approach why are you
> using sysfs?
Yes, and EFA already supports stats reporting, the sysfs choice is strange..
BTW, isn't this something that can be added to restrack?
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-12 6:52 ` Gal Pressman
@ 2026-02-12 16:36 ` Leon Romanovsky
2026-02-15 13:41 ` Michael Margolin
0 siblings, 1 reply; 18+ messages in thread
From: Leon Romanovsky @ 2026-02-12 16:36 UTC (permalink / raw)
To: Gal Pressman
Cc: Jason Gunthorpe, Tom Sela, mrgolin, linux-rdma, sleybo, matua,
Yonatan Nachum
On Thu, Feb 12, 2026 at 08:52:41AM +0200, Gal Pressman wrote:
> On 11/02/2026 15:13, Jason Gunthorpe wrote:
> > On Wed, Feb 11, 2026 at 01:10:48PM +0000, Tom Sela wrote:
> >> +static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
> >> +{
> >> + struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
> >> +
> >> + return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
> >> +}
> >> +
> >> +static DEVICE_ATTR_RO(ah_count);
> >> +
> >> +int efa_sysfs_init(struct efa_dev *dev)
> >> +{
> >> + struct device *device = &dev->pdev->dev;
> >> +
> >> + if (device_create_file(device, &dev_attr_ah_count))
> >> + dev_err(device, "Failed to create AH count sysfs file\n");
> >
> > This is not the right way to use sysfs in rdma drivers.
> >
> > Also we have netlink counters as the prefered approach why are you
> > using sysfs?
>
> Yes, and EFA already supports stats reporting, the sysfs choice is strange..
>
> BTW, isn't this something that can be added to restrack?
Unlikely. Most drivers that implement such counters were written long before
bpftrace became widely used. I don't think modern drivers should carry these
counters, as they are trivial to collect without requiring any kernel changes.
This is especially true for EFA, which does not support kverbs.
Thanks.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-12 16:36 ` Leon Romanovsky
@ 2026-02-15 13:41 ` Michael Margolin
2026-02-15 17:15 ` Leon Romanovsky
0 siblings, 1 reply; 18+ messages in thread
From: Michael Margolin @ 2026-02-15 13:41 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Gal Pressman, Jason Gunthorpe, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Thu, Feb 12, 2026 at 06:36:28PM +0200, Leon Romanovsky wrote:
> On Thu, Feb 12, 2026 at 08:52:41AM +0200, Gal Pressman wrote:
> > On 11/02/2026 15:13, Jason Gunthorpe wrote:
> > > On Wed, Feb 11, 2026 at 01:10:48PM +0000, Tom Sela wrote:
> > >> +static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
> > >> +{
> > >> + struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
> > >> +
> > >> + return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
> > >> +}
> > >> +
> > >> +static DEVICE_ATTR_RO(ah_count);
> > >> +
> > >> +int efa_sysfs_init(struct efa_dev *dev)
> > >> +{
> > >> + struct device *device = &dev->pdev->dev;
> > >> +
> > >> + if (device_create_file(device, &dev_attr_ah_count))
> > >> + dev_err(device, "Failed to create AH count sysfs file\n");
> > >
> > > This is not the right way to use sysfs in rdma drivers.
> > >
> > > Also we have netlink counters as the prefered approach why are you
> > > using sysfs?
> >
> > Yes, and EFA already supports stats reporting, the sysfs choice is strange..
> >
> > BTW, isn't this something that can be added to restrack?
>
> Unlikely. Most drivers that implement such counters were written long before
> bpftrace became widely used. I don't think modern drivers should carry these
> counters, as they are trivial to collect without requiring any kernel changes.
> This is especially true for EFA, which does not support kverbs.
>
> Thanks.
This approach was selected since this case doesn't naturally fit any of
the suggested ideas. It represents usage level of device AH objects
which might be different than the number of kernel objects as usually
covered by restrack count. Stats also doesn't seem as the right place
for this.
In a followup series we will suggest netlink counters extension to
support driver specific resources.
Michael
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-15 13:41 ` Michael Margolin
@ 2026-02-15 17:15 ` Leon Romanovsky
2026-02-15 17:23 ` Gal Pressman
2026-02-16 8:48 ` Michael Margolin
0 siblings, 2 replies; 18+ messages in thread
From: Leon Romanovsky @ 2026-02-15 17:15 UTC (permalink / raw)
To: Michael Margolin
Cc: Gal Pressman, Jason Gunthorpe, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Sun, Feb 15, 2026 at 01:41:22PM +0000, Michael Margolin wrote:
> On Thu, Feb 12, 2026 at 06:36:28PM +0200, Leon Romanovsky wrote:
> > On Thu, Feb 12, 2026 at 08:52:41AM +0200, Gal Pressman wrote:
> > > On 11/02/2026 15:13, Jason Gunthorpe wrote:
> > > > On Wed, Feb 11, 2026 at 01:10:48PM +0000, Tom Sela wrote:
> > > >> +static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
> > > >> +{
> > > >> + struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
> > > >> +
> > > >> + return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
> > > >> +}
> > > >> +
> > > >> +static DEVICE_ATTR_RO(ah_count);
> > > >> +
> > > >> +int efa_sysfs_init(struct efa_dev *dev)
> > > >> +{
> > > >> + struct device *device = &dev->pdev->dev;
> > > >> +
> > > >> + if (device_create_file(device, &dev_attr_ah_count))
> > > >> + dev_err(device, "Failed to create AH count sysfs file\n");
> > > >
> > > > This is not the right way to use sysfs in rdma drivers.
> > > >
> > > > Also we have netlink counters as the prefered approach why are you
> > > > using sysfs?
> > >
> > > Yes, and EFA already supports stats reporting, the sysfs choice is strange..
> > >
> > > BTW, isn't this something that can be added to restrack?
> >
> > Unlikely. Most drivers that implement such counters were written long before
> > bpftrace became widely used. I don't think modern drivers should carry these
> > counters, as they are trivial to collect without requiring any kernel changes.
> > This is especially true for EFA, which does not support kverbs.
> >
> > Thanks.
>
>
> This approach was selected since this case doesn't naturally fit any of
> the suggested ideas. It represents usage level of device AH objects
> which might be different than the number of kernel objects as usually
> covered by restrack count. Stats also doesn't seem as the right place
> for this.
How can the kernel and this new counter report a different number of AH
objects?
>
> In a followup series we will suggest netlink counters extension to
> support driver specific resources.
bpftrace is generally the right tool, unless you can detail why it does not
fit your specific debugging scenario.
Thanks
>
> Michael
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-15 17:15 ` Leon Romanovsky
@ 2026-02-15 17:23 ` Gal Pressman
2026-02-15 17:57 ` Leon Romanovsky
2026-02-16 8:48 ` Michael Margolin
1 sibling, 1 reply; 18+ messages in thread
From: Gal Pressman @ 2026-02-15 17:23 UTC (permalink / raw)
To: Leon Romanovsky, Michael Margolin
Cc: Jason Gunthorpe, Tom Sela, linux-rdma, sleybo, matua,
Yonatan Nachum
On 15/02/2026 19:15, Leon Romanovsky wrote:
>> Stats also doesn't seem as the right place
>> for this.
Because?
>
> How can the kernel and this new counter report a different number of AH
> objects?
>
>>
>> In a followup series we will suggest netlink counters extension to
>> support driver specific resources.
>
> bpftrace is generally the right tool, unless you can detail why it does not
> fit your specific debugging scenario.
I don't understand, how do you use bpftrace for this use case?
Once you get to debug a system in a certain state, bpftrace won't help
you see events that happened in the past. You won't be able to know how
many AH were created.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-15 17:23 ` Gal Pressman
@ 2026-02-15 17:57 ` Leon Romanovsky
2026-02-16 11:08 ` Michael Margolin
0 siblings, 1 reply; 18+ messages in thread
From: Leon Romanovsky @ 2026-02-15 17:57 UTC (permalink / raw)
To: Gal Pressman
Cc: Michael Margolin, Jason Gunthorpe, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Sun, Feb 15, 2026 at 07:23:41PM +0200, Gal Pressman wrote:
> On 15/02/2026 19:15, Leon Romanovsky wrote:
> >> Stats also doesn't seem as the right place
> >> for this.
>
> Because?
>
> >
> > How can the kernel and this new counter report a different number of AH
> > objects?
> >
> >>
> >> In a followup series we will suggest netlink counters extension to
> >> support driver specific resources.
> >
> > bpftrace is generally the right tool, unless you can detail why it does not
> > fit your specific debugging scenario.
>
> I don't understand, how do you use bpftrace for this use case?
>
> Once you get to debug a system in a certain state, bpftrace won't help
> you see events that happened in the past. You won't be able to know how
> many AH were created.
Their proposed counter can be implemented by counting calls to
efa_com_create_ah minus calls to efa_com_destroy_ah.
You have two ways to get it:
1. run bfptrace with your reproducer
2. check FW to get their internal counter
Thanks
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-15 17:15 ` Leon Romanovsky
2026-02-15 17:23 ` Gal Pressman
@ 2026-02-16 8:48 ` Michael Margolin
2026-02-16 9:41 ` Leon Romanovsky
1 sibling, 1 reply; 18+ messages in thread
From: Michael Margolin @ 2026-02-16 8:48 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Gal Pressman, Jason Gunthorpe, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Sun, Feb 15, 2026 at 07:15:43PM +0200, Leon Romanovsky wrote:
> On Sun, Feb 15, 2026 at 01:41:22PM +0000, Michael Margolin wrote:
> > On Thu, Feb 12, 2026 at 06:36:28PM +0200, Leon Romanovsky wrote:
> > > On Thu, Feb 12, 2026 at 08:52:41AM +0200, Gal Pressman wrote:
> > > > On 11/02/2026 15:13, Jason Gunthorpe wrote:
> > > > > On Wed, Feb 11, 2026 at 01:10:48PM +0000, Tom Sela wrote:
> > > > >> +static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
> > > > >> +{
> > > > >> + struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
> > > > >> +
> > > > >> + return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
> > > > >> +}
> > > > >> +
> > > > >> +static DEVICE_ATTR_RO(ah_count);
> > > > >> +
> > > > >> +int efa_sysfs_init(struct efa_dev *dev)
> > > > >> +{
> > > > >> + struct device *device = &dev->pdev->dev;
> > > > >> +
> > > > >> + if (device_create_file(device, &dev_attr_ah_count))
> > > > >> + dev_err(device, "Failed to create AH count sysfs file\n");
> > > > >
> > > > > This is not the right way to use sysfs in rdma drivers.
> > > > >
> > > > > Also we have netlink counters as the prefered approach why are you
> > > > > using sysfs?
> > > >
> > > > Yes, and EFA already supports stats reporting, the sysfs choice is strange..
> > > >
> > > > BTW, isn't this something that can be added to restrack?
> > >
> > > Unlikely. Most drivers that implement such counters were written long before
> > > bpftrace became widely used. I don't think modern drivers should carry these
> > > counters, as they are trivial to collect without requiring any kernel changes.
> > > This is especially true for EFA, which does not support kverbs.
> > >
> > > Thanks.
> >
> >
> > This approach was selected since this case doesn't naturally fit any of
> > the suggested ideas. It represents usage level of device AH objects
> > which might be different than the number of kernel objects as usually
> > covered by restrack count. Stats also doesn't seem as the right place
> > for this.
>
> How can the kernel and this new counter report a different number of AH
> objects?
When application creates multiple AH objects for same peer, the device
reuses the existing resource and returns its AH number. The new counter
counts unique AH numbers thus represents the amount of device resources
currently in use.
>
> >
> > In a followup series we will suggest netlink counters extension to
> > support driver specific resources.
>
> bpftrace is generally the right tool, unless you can detail why it does not
> fit your specific debugging scenario.
This isn't intended for internal debug use but is aimed for end users to
allow tracking of a limited device resource usage by their applications.
Michael
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-16 8:48 ` Michael Margolin
@ 2026-02-16 9:41 ` Leon Romanovsky
0 siblings, 0 replies; 18+ messages in thread
From: Leon Romanovsky @ 2026-02-16 9:41 UTC (permalink / raw)
To: Michael Margolin
Cc: Gal Pressman, Jason Gunthorpe, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Mon, Feb 16, 2026 at 08:48:19AM +0000, Michael Margolin wrote:
> On Sun, Feb 15, 2026 at 07:15:43PM +0200, Leon Romanovsky wrote:
> > On Sun, Feb 15, 2026 at 01:41:22PM +0000, Michael Margolin wrote:
> > > On Thu, Feb 12, 2026 at 06:36:28PM +0200, Leon Romanovsky wrote:
> > > > On Thu, Feb 12, 2026 at 08:52:41AM +0200, Gal Pressman wrote:
> > > > > On 11/02/2026 15:13, Jason Gunthorpe wrote:
> > > > > > On Wed, Feb 11, 2026 at 01:10:48PM +0000, Tom Sela wrote:
> > > > > >> +static ssize_t ah_count_show(struct device *dev, struct device_attribute *attr, char *buf)
> > > > > >> +{
> > > > > >> + struct efa_dev *efa_dev = pci_get_drvdata(to_pci_dev(dev));
> > > > > >> +
> > > > > >> + return sysfs_emit(buf, "%lld\n", atomic64_read(&efa_dev->ah_count));
> > > > > >> +}
> > > > > >> +
> > > > > >> +static DEVICE_ATTR_RO(ah_count);
> > > > > >> +
> > > > > >> +int efa_sysfs_init(struct efa_dev *dev)
> > > > > >> +{
> > > > > >> + struct device *device = &dev->pdev->dev;
> > > > > >> +
> > > > > >> + if (device_create_file(device, &dev_attr_ah_count))
> > > > > >> + dev_err(device, "Failed to create AH count sysfs file\n");
> > > > > >
> > > > > > This is not the right way to use sysfs in rdma drivers.
> > > > > >
> > > > > > Also we have netlink counters as the prefered approach why are you
> > > > > > using sysfs?
> > > > >
> > > > > Yes, and EFA already supports stats reporting, the sysfs choice is strange..
> > > > >
> > > > > BTW, isn't this something that can be added to restrack?
> > > >
> > > > Unlikely. Most drivers that implement such counters were written long before
> > > > bpftrace became widely used. I don't think modern drivers should carry these
> > > > counters, as they are trivial to collect without requiring any kernel changes.
> > > > This is especially true for EFA, which does not support kverbs.
> > > >
> > > > Thanks.
> > >
> > >
> > > This approach was selected since this case doesn't naturally fit any of
> > > the suggested ideas. It represents usage level of device AH objects
> > > which might be different than the number of kernel objects as usually
> > > covered by restrack count. Stats also doesn't seem as the right place
> > > for this.
> >
> > How can the kernel and this new counter report a different number of AH
> > objects?
>
> When application creates multiple AH objects for same peer, the device
> reuses the existing resource and returns its AH number. The new counter
> counts unique AH numbers thus represents the amount of device resources
> currently in use.
https://lore.kernel.org/all/20260215175707.GC12989@unreal
Thanks
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-15 17:57 ` Leon Romanovsky
@ 2026-02-16 11:08 ` Michael Margolin
2026-02-16 11:22 ` Leon Romanovsky
0 siblings, 1 reply; 18+ messages in thread
From: Michael Margolin @ 2026-02-16 11:08 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Gal Pressman, Jason Gunthorpe, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Sun, Feb 15, 2026 at 07:57:07PM +0200, Leon Romanovsky wrote:
> On Sun, Feb 15, 2026 at 07:23:41PM +0200, Gal Pressman wrote:
> > On 15/02/2026 19:15, Leon Romanovsky wrote:
> > >> Stats also doesn't seem as the right place
> > >> for this.
> >
> > Because?
> >
> > >
> > > How can the kernel and this new counter report a different number of AH
> > > objects?
> > >
> > >>
> > >> In a followup series we will suggest netlink counters extension to
> > >> support driver specific resources.
> > >
> > > bpftrace is generally the right tool, unless you can detail why it does not
> > > fit your specific debugging scenario.
> >
> > I don't understand, how do you use bpftrace for this use case?
> >
> > Once you get to debug a system in a certain state, bpftrace won't help
> > you see events that happened in the past. You won't be able to know how
> > many AH were created.
>
> Their proposed counter can be implemented by counting calls to
> efa_com_create_ah minus calls to efa_com_destroy_ah.
>
> You have two ways to get it:
> 1. run bfptrace with your reproducer
> 2. check FW to get their internal counter
>
Calls to efa_com_create_ah minus calls to efa_com_destroy_ah will not
always result in correct number of consumed device resources as multiple
calls to efa_com_create_ah can return the same AH number.
Additionally we are looking to expose this info to customers without
requiring a kernel rebuild or the use of debug tools, similar to how
device and port statistics can be read in sysfs or through the rdma
tool.
Michael
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-16 11:08 ` Michael Margolin
@ 2026-02-16 11:22 ` Leon Romanovsky
2026-02-17 14:54 ` Michael Margolin
0 siblings, 1 reply; 18+ messages in thread
From: Leon Romanovsky @ 2026-02-16 11:22 UTC (permalink / raw)
To: Michael Margolin
Cc: Gal Pressman, Jason Gunthorpe, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Mon, Feb 16, 2026 at 11:08:53AM +0000, Michael Margolin wrote:
> On Sun, Feb 15, 2026 at 07:57:07PM +0200, Leon Romanovsky wrote:
> > On Sun, Feb 15, 2026 at 07:23:41PM +0200, Gal Pressman wrote:
> > > On 15/02/2026 19:15, Leon Romanovsky wrote:
> > > >> Stats also doesn't seem as the right place
> > > >> for this.
> > >
> > > Because?
> > >
> > > >
> > > > How can the kernel and this new counter report a different number of AH
> > > > objects?
> > > >
> > > >>
> > > >> In a followup series we will suggest netlink counters extension to
> > > >> support driver specific resources.
> > > >
> > > > bpftrace is generally the right tool, unless you can detail why it does not
> > > > fit your specific debugging scenario.
> > >
> > > I don't understand, how do you use bpftrace for this use case?
> > >
> > > Once you get to debug a system in a certain state, bpftrace won't help
> > > you see events that happened in the past. You won't be able to know how
> > > many AH were created.
> >
> > Their proposed counter can be implemented by counting calls to
> > efa_com_create_ah minus calls to efa_com_destroy_ah.
> >
> > You have two ways to get it:
> > 1. run bfptrace with your reproducer
> > 2. check FW to get their internal counter
> >
>
> Calls to efa_com_create_ah minus calls to efa_com_destroy_ah will not
> always result in correct number of consumed device resources as multiple
> calls to efa_com_create_ah can return the same AH number.
bpftrace supports map and can count unique ids.
>
> Additionally we are looking to expose this info to customers without
> requiring a kernel rebuild or the use of debug tools, similar to how
> device and port statistics can be read in sysfs or through the rdma
> tool.
BPF doesn't require any kernel rebuild. It works out-of-the-box on even
old kernels.
Thanks
>
> Michael
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-16 11:22 ` Leon Romanovsky
@ 2026-02-17 14:54 ` Michael Margolin
2026-02-18 0:14 ` Jason Gunthorpe
0 siblings, 1 reply; 18+ messages in thread
From: Michael Margolin @ 2026-02-17 14:54 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Gal Pressman, Jason Gunthorpe, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Mon, Feb 16, 2026 at 01:22:07PM +0200, Leon Romanovsky wrote:
> On Mon, Feb 16, 2026 at 11:08:53AM +0000, Michael Margolin wrote:
> > On Sun, Feb 15, 2026 at 07:57:07PM +0200, Leon Romanovsky wrote:
> > > On Sun, Feb 15, 2026 at 07:23:41PM +0200, Gal Pressman wrote:
> > > > On 15/02/2026 19:15, Leon Romanovsky wrote:
> > > > >> Stats also doesn't seem as the right place
> > > > >> for this.
> > > >
> > > > Because?
> > > >
> > > > >
> > > > > How can the kernel and this new counter report a different number of AH
> > > > > objects?
> > > > >
> > > > >>
> > > > >> In a followup series we will suggest netlink counters extension to
> > > > >> support driver specific resources.
> > > > >
> > > > > bpftrace is generally the right tool, unless you can detail why it does not
> > > > > fit your specific debugging scenario.
> > > >
> > > > I don't understand, how do you use bpftrace for this use case?
> > > >
> > > > Once you get to debug a system in a certain state, bpftrace won't help
> > > > you see events that happened in the past. You won't be able to know how
> > > > many AH were created.
> > >
> > > Their proposed counter can be implemented by counting calls to
> > > efa_com_create_ah minus calls to efa_com_destroy_ah.
> > >
> > > You have two ways to get it:
> > > 1. run bfptrace with your reproducer
> > > 2. check FW to get their internal counter
> > >
> >
> > Calls to efa_com_create_ah minus calls to efa_com_destroy_ah will not
> > always result in correct number of consumed device resources as multiple
> > calls to efa_com_create_ah can return the same AH number.
>
> bpftrace supports map and can count unique ids.
>
> >
> > Additionally we are looking to expose this info to customers without
> > requiring a kernel rebuild or the use of debug tools, similar to how
> > device and port statistics can be read in sysfs or through the rdma
> > tool.
>
> BPF doesn't require any kernel rebuild. It works out-of-the-box on even
> old kernels.
I'll try to give a higher-level overview of the need.
I don't argue that bpftrace is a powerful debug tool that can be used to
collect various info from the kernel, including this one and other data
that we expose through sysfs, netlink, and rdma-core. This being said, I
don't think it should be considered as a replacement for an explicit and
stable kernel ABI.
What the proposed change is trying to do is give end users the ability
to monitor AH usage in production. Resource usage and traffic counters
are usually collected periodically (every 1-5 seconds) by dedicated
collectors (e.g., Prometheus node exporter). Such user processes can't
depend on in-kernel function names or any internal logic that might
change from time to time, and expect well-defined and solid interfaces.
I don't see how bpftrace can serve this need.
Michael
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-17 14:54 ` Michael Margolin
@ 2026-02-18 0:14 ` Jason Gunthorpe
2026-02-18 9:15 ` Leon Romanovsky
0 siblings, 1 reply; 18+ messages in thread
From: Jason Gunthorpe @ 2026-02-18 0:14 UTC (permalink / raw)
To: Michael Margolin
Cc: Leon Romanovsky, Gal Pressman, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Tue, Feb 17, 2026 at 02:54:26PM +0000, Michael Margolin wrote:
> to monitor AH usage in production. Resource usage and traffic counters
> are usually collected periodically (every 1-5 seconds) by dedicated
> collectors (e.g., Prometheus node exporter).
Can you just have two simple stats
'# HW AHs created'
'# HW AHs destroyed'
and the value you want is the simple difference?
Jason
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-18 0:14 ` Jason Gunthorpe
@ 2026-02-18 9:15 ` Leon Romanovsky
2026-02-18 17:27 ` Michael Margolin
0 siblings, 1 reply; 18+ messages in thread
From: Leon Romanovsky @ 2026-02-18 9:15 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Michael Margolin, Gal Pressman, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Tue, Feb 17, 2026 at 08:14:08PM -0400, Jason Gunthorpe wrote:
> On Tue, Feb 17, 2026 at 02:54:26PM +0000, Michael Margolin wrote:
> > to monitor AH usage in production. Resource usage and traffic counters
> > are usually collected periodically (every 1-5 seconds) by dedicated
> > collectors (e.g., Prometheus node exporter).
>
> Can you just have two simple stats
> '# HW AHs created'
> '# HW AHs destroyed'
>
> and the value you want is the simple difference?
Which can be collected from FW through FWCTL?
I don't super excited to see slow, unique sysfs UAPI field in RDMA
subsystem. They are interested to get FW information, let's use
interfaces which are intended for it.
Thanks
>
> Jason
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure
2026-02-18 9:15 ` Leon Romanovsky
@ 2026-02-18 17:27 ` Michael Margolin
0 siblings, 0 replies; 18+ messages in thread
From: Michael Margolin @ 2026-02-18 17:27 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Jason Gunthorpe, Gal Pressman, Tom Sela, linux-rdma, sleybo,
matua, Yonatan Nachum
On Wed, Feb 18, 2026 at 11:15:59AM +0200, Leon Romanovsky wrote:
> On Tue, Feb 17, 2026 at 08:14:08PM -0400, Jason Gunthorpe wrote:
> > On Tue, Feb 17, 2026 at 02:54:26PM +0000, Michael Margolin wrote:
> > > to monitor AH usage in production. Resource usage and traffic counters
> > > are usually collected periodically (every 1-5 seconds) by dedicated
> > > collectors (e.g., Prometheus node exporter).
> >
> > Can you just have two simple stats
> > '# HW AHs created'
> > '# HW AHs destroyed'
> >
> > and the value you want is the simple difference?
I think that adding two separate counters as you suggest under
/sys/class/infiniband/*/hw_counters/ can work for us.
Will submit a new patch for this option.
>
> Which can be collected from FW through FWCTL?
>
> I don't super excited to see slow, unique sysfs UAPI field in RDMA
> subsystem. They are interested to get FW information, let's use
> interfaces which are intended for it.
>
> Thanks
Leon, the device currently doesn't expose this info and it's generated
in the driver.
Michael
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2026-02-18 17:27 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-11 13:10 [PATCH for-next] RDMA/efa: Add AH usage counter with sysfs exposure Tom Sela
2026-02-11 13:13 ` Jason Gunthorpe
2026-02-12 6:52 ` Gal Pressman
2026-02-12 16:36 ` Leon Romanovsky
2026-02-15 13:41 ` Michael Margolin
2026-02-15 17:15 ` Leon Romanovsky
2026-02-15 17:23 ` Gal Pressman
2026-02-15 17:57 ` Leon Romanovsky
2026-02-16 11:08 ` Michael Margolin
2026-02-16 11:22 ` Leon Romanovsky
2026-02-17 14:54 ` Michael Margolin
2026-02-18 0:14 ` Jason Gunthorpe
2026-02-18 9:15 ` Leon Romanovsky
2026-02-18 17:27 ` Michael Margolin
2026-02-16 8:48 ` Michael Margolin
2026-02-16 9:41 ` Leon Romanovsky
2026-02-11 18:48 ` yanjun.zhu
2026-02-12 6:52 ` Gal Pressman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox