* [PATCH rdma-next 1/4] net: mana: Probe rdma device in mana driver
2025-04-14 18:28 [PATCH rdma-next 0/4] RDMA/mana_ib: allow separate mana_ib for each mana client Konstantin Taranov
@ 2025-04-14 18:28 ` Konstantin Taranov
2025-04-14 18:28 ` [PATCH rdma-next 2/4] RDMA/mana_ib: Add support of mana_ib for RNIC and ETH nic Konstantin Taranov
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Konstantin Taranov @ 2025-04-14 18:28 UTC (permalink / raw)
To: kotaranov, pabeni, haiyangz, kys, edumazet, kuba, davem, decui,
wei.liu, longli, jgg, leon
Cc: linux-rdma, linux-kernel, netdev
From: Konstantin Taranov <kotaranov@microsoft.com>
Initialize gdma device for rdma inside mana module.
For each gdma device, initialize an auxiliary ib device.
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
---
.../net/ethernet/microsoft/mana/gdma_main.c | 16 +++++++-
drivers/net/ethernet/microsoft/mana/mana_en.c | 39 +++++++++++++++++--
include/net/mana/mana.h | 3 ++
3 files changed, 53 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index b5156d4..1caf73c 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -978,7 +978,6 @@ int mana_gd_register_device(struct gdma_dev *gd)
return 0;
}
-EXPORT_SYMBOL_NS(mana_gd_register_device, "NET_MANA");
int mana_gd_deregister_device(struct gdma_dev *gd)
{
@@ -1009,7 +1008,6 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
return err;
}
-EXPORT_SYMBOL_NS(mana_gd_deregister_device, "NET_MANA");
u32 mana_gd_wq_avail_space(struct gdma_queue *wq)
{
@@ -1541,8 +1539,15 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto cleanup_gd;
+
+ err = mana_rdma_probe(&gc->mana_ib);
+ if (err)
+ goto cleanup_mana;
+
return 0;
+cleanup_mana:
+ mana_remove(&gc->mana, false);
cleanup_gd:
mana_gd_cleanup(pdev);
unmap_bar:
@@ -1569,6 +1574,7 @@ static void mana_gd_remove(struct pci_dev *pdev)
{
struct gdma_context *gc = pci_get_drvdata(pdev);
+ mana_rdma_remove(&gc->mana_ib);
mana_remove(&gc->mana, false);
mana_gd_cleanup(pdev);
@@ -1588,6 +1594,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
{
struct gdma_context *gc = pci_get_drvdata(pdev);
+ mana_rdma_remove(&gc->mana_ib);
mana_remove(&gc->mana, true);
mana_gd_cleanup(pdev);
@@ -1612,6 +1619,10 @@ static int mana_gd_resume(struct pci_dev *pdev)
if (err)
return err;
+ err = mana_rdma_probe(&gc->mana_ib);
+ if (err)
+ return err;
+
return 0;
}
@@ -1622,6 +1633,7 @@ static void mana_gd_shutdown(struct pci_dev *pdev)
dev_info(&pdev->dev, "Shutdown was called\n");
+ mana_rdma_remove(&gc->mana_ib);
mana_remove(&gc->mana, true);
mana_gd_cleanup(pdev);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 4e870b1..70c4955 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -2936,7 +2936,7 @@ static void remove_adev(struct gdma_dev *gd)
gd->adev = NULL;
}
-static int add_adev(struct gdma_dev *gd)
+static int add_adev(struct gdma_dev *gd, const char *name)
{
struct auxiliary_device *adev;
struct mana_adev *madev;
@@ -2952,7 +2952,7 @@ static int add_adev(struct gdma_dev *gd)
goto idx_fail;
adev->id = ret;
- adev->name = "rdma";
+ adev->name = name;
adev->dev.parent = gd->gdma_context->dev;
adev->dev.release = adev_release;
madev->mdev = gd;
@@ -3064,7 +3064,7 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
}
}
- err = add_adev(gd);
+ err = add_adev(gd, "dpdk");
out:
if (err)
mana_remove(gd, false);
@@ -3131,6 +3131,39 @@ out:
kfree(ac);
}
+int mana_rdma_probe(struct gdma_dev *gd)
+{
+ int err = 0;
+
+ if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
+ /* RDMA device is not detected on pci */
+ return err;
+ }
+
+ err = mana_gd_register_device(gd);
+ if (err)
+ return err;
+
+ err = add_adev(gd, "rdma");
+ if (err)
+ mana_gd_deregister_device(gd);
+
+ return err;
+}
+
+void mana_rdma_remove(struct gdma_dev *gd)
+{
+ if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
+ /* RDMA device is not detected on pci */
+ return;
+ }
+
+ if (gd->adev)
+ remove_adev(gd);
+
+ mana_gd_deregister_device(gd);
+}
+
struct net_device *mana_get_primary_netdev(struct mana_context *ac,
u32 port_index,
netdevice_tracker *tracker)
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0f78065..5857efc 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -488,6 +488,9 @@ int mana_detach(struct net_device *ndev, bool from_close);
int mana_probe(struct gdma_dev *gd, bool resuming);
void mana_remove(struct gdma_dev *gd, bool suspending);
+int mana_rdma_probe(struct gdma_dev *gd);
+void mana_rdma_remove(struct gdma_dev *gd);
+
void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
u32 flags);
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH rdma-next 2/4] RDMA/mana_ib: Add support of mana_ib for RNIC and ETH nic
2025-04-14 18:28 [PATCH rdma-next 0/4] RDMA/mana_ib: allow separate mana_ib for each mana client Konstantin Taranov
2025-04-14 18:28 ` [PATCH rdma-next 1/4] net: mana: Probe rdma device in mana driver Konstantin Taranov
@ 2025-04-14 18:28 ` Konstantin Taranov
2025-04-14 18:28 ` [PATCH rdma-next 3/4] RDMA/mana_ib: unify mana_ib functions to support any gdma device Konstantin Taranov
2025-04-14 18:28 ` [PATCH rdma-next 4/4] net: mana: Add support for auxiliary device servicing events Konstantin Taranov
3 siblings, 0 replies; 8+ messages in thread
From: Konstantin Taranov @ 2025-04-14 18:28 UTC (permalink / raw)
To: kotaranov, pabeni, haiyangz, kys, edumazet, kuba, davem, decui,
wei.liu, longli, jgg, leon
Cc: linux-rdma, linux-kernel, netdev
From: Konstantin Taranov <kotaranov@microsoft.com>
Allow mana_ib to be created over ethernet gdma device and
over rnic gdma device. The HW has two devices with different
capabilities and different use-cases. Initialize required
resources depending on the used gdma device.
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
---
drivers/infiniband/hw/mana/device.c | 174 +++++++++++++--------------
drivers/infiniband/hw/mana/main.c | 49 +++++++-
drivers/infiniband/hw/mana/mana_ib.h | 6 +
3 files changed, 134 insertions(+), 95 deletions(-)
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index b310893..c484bed 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -101,103 +101,95 @@ static int mana_ib_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+ struct gdma_context *gc = madev->mdev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
struct gdma_dev *mdev = madev->mdev;
struct net_device *ndev;
- struct mana_context *mc;
struct mana_ib_dev *dev;
u8 mac_addr[ETH_ALEN];
int ret;
- mc = mdev->driver_data;
-
dev = ib_alloc_device(mana_ib_dev, ib_dev);
if (!dev)
return -ENOMEM;
ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
-
- dev->ib_dev.phys_port_cnt = mc->num_ports;
-
- ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
- mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
-
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
-
- /*
- * num_comp_vectors needs to set to the max MSIX index
- * when interrupts and event queues are implemented
- */
- dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
- dev->ib_dev.dev.parent = mdev->gdma_context->dev;
-
- ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
- if (!ndev) {
- ret = -ENODEV;
- ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
- goto free_ib_device;
- }
- ether_addr_copy(mac_addr, ndev->dev_addr);
- addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
- ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
- /* mana_get_primary_netdev() returns ndev with refcount held */
- netdev_put(ndev, &dev->dev_tracker);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
- goto free_ib_device;
- }
-
- ret = mana_gd_register_device(&mdev->gdma_context->mana_ib);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to register device, ret %d",
- ret);
- goto free_ib_device;
- }
- dev->gdma_dev = &mdev->gdma_context->mana_ib;
-
- dev->nb.notifier_call = mana_ib_netdev_event;
- ret = register_netdevice_notifier(&dev->nb);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
- ret);
- goto deregister_device;
- }
-
- ret = mana_ib_gd_query_adapter_caps(dev);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
- ret);
- goto deregister_net_notifier;
- }
-
- ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
-
- ret = mana_ib_create_eqs(dev);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
- goto deregister_net_notifier;
- }
-
- ret = mana_ib_gd_create_rnic_adapter(dev);
- if (ret)
- goto destroy_eqs;
-
+ dev->ib_dev.num_comp_vectors = gc->max_num_queues;
+ dev->ib_dev.dev.parent = gc->dev;
+ dev->gdma_dev = mdev;
xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
- ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
- ret);
- goto destroy_rnic;
+
+ if (mana_ib_is_rnic(dev)) {
+ dev->ib_dev.phys_port_cnt = 1;
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ if (!ndev) {
+ ret = -ENODEV;
+ ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
+ goto free_ib_device;
+ }
+ ether_addr_copy(mac_addr, ndev->dev_addr);
+ addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
+ ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+ ret);
+ goto free_ib_device;
+ }
+
+ ret = mana_ib_gd_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret);
+ goto deregister_net_notifier;
+ }
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
+ ret = mana_ib_create_eqs(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
+ goto deregister_net_notifier;
+ }
+
+ ret = mana_ib_gd_create_rnic_adapter(dev);
+ if (ret)
+ goto destroy_eqs;
+
+ ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret);
+ goto destroy_rnic;
+ }
+ } else {
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+ ret = mana_eth_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret);
+ goto free_ib_device;
+ }
}
- dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev,
- MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0);
+ dev->av_pool = dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SIZE,
+ MANA_AV_BUFFER_SIZE, 0);
if (!dev->av_pool) {
ret = -ENOMEM;
goto destroy_rnic;
}
- ret = ib_register_device(&dev->ib_dev, "mana_%d",
- mdev->gdma_context->dev);
+ ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+ mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+ ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d" : "manae_%d",
+ gc->dev);
if (ret)
goto deallocate_pool;
@@ -208,15 +200,16 @@ static int mana_ib_probe(struct auxiliary_device *adev,
deallocate_pool:
dma_pool_destroy(dev->av_pool);
destroy_rnic:
- xa_destroy(&dev->qp_table_wq);
- mana_ib_gd_destroy_rnic_adapter(dev);
+ if (mana_ib_is_rnic(dev))
+ mana_ib_gd_destroy_rnic_adapter(dev);
destroy_eqs:
- mana_ib_destroy_eqs(dev);
+ if (mana_ib_is_rnic(dev))
+ mana_ib_destroy_eqs(dev);
deregister_net_notifier:
- unregister_netdevice_notifier(&dev->nb);
-deregister_device:
- mana_gd_deregister_device(dev->gdma_dev);
+ if (mana_ib_is_rnic(dev))
+ unregister_netdevice_notifier(&dev->nb);
free_ib_device:
+ xa_destroy(&dev->qp_table_wq);
ib_dealloc_device(&dev->ib_dev);
return ret;
}
@@ -227,25 +220,24 @@ static void mana_ib_remove(struct auxiliary_device *adev)
ib_unregister_device(&dev->ib_dev);
dma_pool_destroy(dev->av_pool);
+ if (mana_ib_is_rnic(dev)) {
+ mana_ib_gd_destroy_rnic_adapter(dev);
+ mana_ib_destroy_eqs(dev);
+ unregister_netdevice_notifier(&dev->nb);
+ }
xa_destroy(&dev->qp_table_wq);
- mana_ib_gd_destroy_rnic_adapter(dev);
- mana_ib_destroy_eqs(dev);
- unregister_netdevice_notifier(&dev->nb);
- mana_gd_deregister_device(dev->gdma_dev);
ib_dealloc_device(&dev->ib_dev);
}
static const struct auxiliary_device_id mana_id_table[] = {
- {
- .name = "mana.rdma",
- },
+ { .name = "mana.rdma", },
+ { .name = "mana.dpdk", },
{},
};
MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
static struct auxiliary_driver mana_driver = {
- .name = "rdma",
.probe = mana_ib_probe,
.remove = mana_ib_remove,
.id_table = mana_id_table,
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index a28b712..64526b8 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -551,6 +551,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
struct ib_port_attr attr;
int err;
@@ -560,10 +561,12 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
- if (port_num == 1) {
- immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ if (mana_ib_is_rnic(dev)) {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
}
return 0;
@@ -597,12 +600,17 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
props->max_pkeys = 1;
props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
+ if (!mana_ib_is_rnic(dev)) {
+ props->raw_packet_caps = IB_RAW_PACKET_CAP_CVLAN_STRIPPING | IB_RAW_PACKET_CAP_IP_CSUM;
+ }
+
return 0;
}
int mana_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
if (!ndev)
@@ -623,7 +631,7 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_EDR;
props->pkey_tbl_len = 1;
- if (port == 1) {
+ if (mana_ib_is_rnic(dev)) {
props->gid_tbl_len = 16;
props->port_cap_flags = IB_PORT_CM_SUP;
props->ip_gids = true;
@@ -703,6 +711,36 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
return 0;
}
+int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct gdma_query_max_resources_resp resp = {};
+ struct gdma_general_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+ sizeof(req), sizeof(resp));
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
+ caps->max_cq_count = resp.max_cq;
+ caps->max_mr_count = resp.max_mst;
+ caps->max_pd_count = 0x6000;
+ caps->max_qp_wr = min_t(u32,
+ 0x100000 / GDMA_MAX_SQE_SIZE,
+ 0x100000 / GDMA_MAX_RQE_SIZE);
+ caps->max_send_sge_count = 30;
+ caps->max_recv_sge_count = 15;
+
+ return 0;
+}
+
static void
mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
{
@@ -921,6 +959,9 @@ int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 do
struct mana_rnic_create_cq_req req = {};
int err;
+ if (!mdev->eqs)
+ return -EINVAL;
+
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
req.hdr.dev_id = gc->mana_ib.dev_id;
req.adapter = mdev->adapter_handle;
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index f0dbd90..42bebd6 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -544,6 +544,11 @@ static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
complete(&qp->free);
}
+static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB;
+}
+
static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
{
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
@@ -643,6 +648,7 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
+int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
int mana_ib_create_eqs(struct mana_ib_dev *mdev);
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH rdma-next 3/4] RDMA/mana_ib: unify mana_ib functions to support any gdma device
2025-04-14 18:28 [PATCH rdma-next 0/4] RDMA/mana_ib: allow separate mana_ib for each mana client Konstantin Taranov
2025-04-14 18:28 ` [PATCH rdma-next 1/4] net: mana: Probe rdma device in mana driver Konstantin Taranov
2025-04-14 18:28 ` [PATCH rdma-next 2/4] RDMA/mana_ib: Add support of mana_ib for RNIC and ETH nic Konstantin Taranov
@ 2025-04-14 18:28 ` Konstantin Taranov
2025-04-14 18:28 ` [PATCH rdma-next 4/4] net: mana: Add support for auxiliary device servicing events Konstantin Taranov
3 siblings, 0 replies; 8+ messages in thread
From: Konstantin Taranov @ 2025-04-14 18:28 UTC (permalink / raw)
To: kotaranov, pabeni, haiyangz, kys, edumazet, kuba, davem, decui,
wei.liu, longli, jgg, leon
Cc: linux-rdma, linux-kernel, netdev
From: Konstantin Taranov <kotaranov@microsoft.com>
Use the installed gdma_device instead of hard-coded device
in requests to the HW.
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
---
drivers/infiniband/hw/mana/cq.c | 4 +---
drivers/infiniband/hw/mana/main.c | 28 ++++++++++++++--------------
drivers/infiniband/hw/mana/qp.c | 5 ++---
3 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index 0fc4e26..28e154b 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -15,14 +15,12 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_device *ibdev = ibcq->device;
struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
- struct gdma_context *gc;
bool is_rnic_cq;
u32 doorbell;
u32 buf_size;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- gc = mdev_to_gc(mdev);
cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
cq->cq_handle = INVALID_MANA_HANDLE;
@@ -65,7 +63,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
return err;
}
- doorbell = gc->mana_ib.doorbell;
+ doorbell = mdev->gdma_dev->doorbell;
}
if (is_rnic_cq) {
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 64526b8..95ade5b 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -243,7 +243,6 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
struct mana_ib_queue *queue)
{
- struct gdma_context *gc = mdev_to_gc(mdev);
struct gdma_queue_spec spec = {};
int err;
@@ -252,7 +251,7 @@ int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_qu
spec.type = type;
spec.monitor_avl_buf = false;
spec.queue_size = size;
- err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem);
+ err = mana_gd_create_mana_wq_cq(mdev->gdma_dev, &spec, &queue->kmem);
if (err)
return err;
/* take ownership into mana_ib from mana */
@@ -737,6 +736,7 @@ int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
0x100000 / GDMA_MAX_RQE_SIZE);
caps->max_send_sge_count = 30;
caps->max_recv_sge_count = 15;
+ caps->page_size_cap = PAGE_SZ_BM;
return 0;
}
@@ -782,7 +782,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev)
spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
spec.eq.msix_index = 0;
- err = mana_gd_create_mana_eq(&gc->mana_ib, &spec, &mdev->fatal_err_eq);
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->fatal_err_eq);
if (err)
return err;
@@ -833,7 +833,7 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req), sizeof(resp));
req.hdr.req.msg_version = GDMA_MESSAGE_V2;
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.notify_eq_id = mdev->fatal_err_eq->id;
if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
@@ -858,7 +858,7 @@ int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev)
gc = mdev_to_gc(mdev);
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
@@ -885,7 +885,7 @@ int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context)
}
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.op = ADDR_OP_ADD;
req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
@@ -915,7 +915,7 @@ int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context)
}
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.op = ADDR_OP_REMOVE;
req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
@@ -938,7 +938,7 @@ int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8
int err;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_MAC_ADDR, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.op = op;
copy_in_reverse(req.mac_addr, mac, ETH_ALEN);
@@ -963,7 +963,7 @@ int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 do
return -EINVAL;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.gdma_region = cq->queue.gdma_region;
req.eq_id = mdev->eqs[cq->comp_vector]->id;
@@ -995,7 +995,7 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
return 0;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_CQ, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.cq_handle = cq->cq_handle;
@@ -1021,7 +1021,7 @@ int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
int err, i;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_RC_QP, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.pd_handle = pd->pd_handle;
req.send_cq_handle = send_cq->cq_handle;
@@ -1057,7 +1057,7 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
int err;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_RC_QP, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.rc_qp_handle = qp->qp_handle;
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
@@ -1080,7 +1080,7 @@ int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
int err, i;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.pd_handle = pd->pd_handle;
req.send_cq_handle = send_cq->cq_handle;
@@ -1115,7 +1115,7 @@ int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
int err;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.qp_handle = qp->qp_handle;
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index c928af5..14fd7d6 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -635,7 +635,6 @@ static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
{
struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
- struct gdma_context *gc = mdev_to_gc(mdev);
u32 doorbell, queue_size;
int i, err;
@@ -654,7 +653,7 @@ static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
goto destroy_queues;
}
}
- doorbell = gc->mana_ib.doorbell;
+ doorbell = mdev->gdma_dev->doorbell;
err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
sizeof(struct ud_rq_shadow_wqe));
@@ -736,7 +735,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int err;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.qp_handle = qp->qp_handle;
req.qp_state = attr->qp_state;
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH rdma-next 4/4] net: mana: Add support for auxiliary device servicing events
2025-04-14 18:28 [PATCH rdma-next 0/4] RDMA/mana_ib: allow separate mana_ib for each mana client Konstantin Taranov
` (2 preceding siblings ...)
2025-04-14 18:28 ` [PATCH rdma-next 3/4] RDMA/mana_ib: unify mana_ib functions to support any gdma device Konstantin Taranov
@ 2025-04-14 18:28 ` Konstantin Taranov
2025-04-20 10:53 ` Leon Romanovsky
3 siblings, 1 reply; 8+ messages in thread
From: Konstantin Taranov @ 2025-04-14 18:28 UTC (permalink / raw)
To: kotaranov, pabeni, haiyangz, kys, edumazet, kuba, davem, decui,
wei.liu, longli, jgg, leon
Cc: linux-rdma, linux-kernel, netdev
From: Shiraz Saleem <shirazsaleem@microsoft.com>
Handle soc servcing events which require the rdma auxiliary device resources to
be cleaned up during a suspend, and re-initialized during a resume.
Signed-off-by: Shiraz Saleem <shirazsaleem@microsoft.com>
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
---
.../net/ethernet/microsoft/mana/gdma_main.c | 11 +++-
.../net/ethernet/microsoft/mana/hw_channel.c | 19 ++++++
drivers/net/ethernet/microsoft/mana/mana_en.c | 60 +++++++++++++++++++
include/net/mana/gdma.h | 18 ++++++
include/net/mana/hw_channel.h | 9 +++
5 files changed, 116 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 1caf73c..1d98dd6 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -385,6 +385,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
case GDMA_EQE_HWC_INIT_EQ_ID_DB:
case GDMA_EQE_HWC_INIT_DATA:
case GDMA_EQE_HWC_INIT_DONE:
+ case GDMA_EQE_HWC_SOC_SERVICE:
case GDMA_EQE_RNIC_QP_FATAL:
if (!eq->eq.callback)
break;
@@ -1438,9 +1439,13 @@ static int mana_gd_setup(struct pci_dev *pdev)
mana_gd_init_registers(pdev);
mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
+ gc->service_wq = alloc_ordered_workqueue("gdma_service_wq", 0);
+ if (!gc->service_wq)
+ return -ENOMEM;
+
err = mana_gd_setup_irqs(pdev);
if (err)
- return err;
+ goto free_workqueue;
err = mana_hwc_create_channel(gc);
if (err)
@@ -1464,6 +1469,8 @@ destroy_hwc:
mana_hwc_destroy_channel(gc);
remove_irq:
mana_gd_remove_irqs(pdev);
+free_workqueue:
+ destroy_workqueue(gc->service_wq);
return err;
}
@@ -1474,6 +1481,8 @@ static void mana_gd_cleanup(struct pci_dev *pdev)
mana_hwc_destroy_channel(gc);
mana_gd_remove_irqs(pdev);
+
+ destroy_workqueue(gc->service_wq);
}
static bool mana_is_pf(unsigned short dev_id)
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index a00f915..407b46e 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -112,11 +112,13 @@ out:
static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
struct gdma_event *event)
{
+ union hwc_init_soc_service_type service_data;
struct hw_channel_context *hwc = ctx;
struct gdma_dev *gd = hwc->gdma_dev;
union hwc_init_type_data type_data;
union hwc_init_eq_id_db eq_db;
u32 type, val;
+ int ret;
switch (event->type) {
case GDMA_EQE_HWC_INIT_EQ_ID_DB:
@@ -199,7 +201,24 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
}
break;
+ case GDMA_EQE_HWC_SOC_SERVICE:
+ service_data.as_uint32 = event->details[0];
+ type = service_data.type;
+ val = service_data.value;
+ switch (type) {
+ case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
+ case GDMA_SERVICE_TYPE_RDMA_RESUME:
+ ret = mana_rdma_service_event(gd->gdma_context, type);
+ if (ret)
+ dev_err(hwc->dev, "Failed to schedule adev service event: %d\n", ret);
+ break;
+ default:
+ dev_warn(hwc->dev, "Received unknown SOC service type %u\n", type);
+ break;
+ }
+
+ break;
default:
dev_warn(hwc->dev, "Received unknown gdma event %u\n", event->type);
/* Ignore unknown events, which should never happen. */
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 70c4955..d832700 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -2982,6 +2982,66 @@ idx_fail:
return ret;
}
+static void mana_handle_rdma_servicing(struct work_struct *work)
+{
+ struct mana_service_work *serv_work =
+ container_of(work, struct mana_service_work, work);
+ struct gdma_dev *gd = serv_work->gdma_dev;
+ struct device *dev = gd->gdma_context->dev;
+ int ret;
+
+ switch (serv_work->event) {
+ case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
+ if (!gd->adev || gd->is_suspended)
+ break;
+
+ remove_adev(gd);
+ gd->is_suspended = true;
+ break;
+
+ case GDMA_SERVICE_TYPE_RDMA_RESUME:
+ if (!gd->is_suspended)
+ break;
+
+ ret = add_adev(gd, "rdma");
+ if (ret)
+ dev_err(dev, "Failed to add adev on resume: %d\n", ret);
+ else
+ gd->is_suspended = false;
+ break;
+
+ default:
+ dev_warn(dev, "unknown adev service event %u\n",
+ serv_work->event);
+ break;
+ }
+
+ kfree(serv_work);
+}
+
+int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event)
+{
+ struct gdma_dev *gd = &gc->mana_ib;
+ struct mana_service_work *serv_work;
+
+ if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
+ /* RDMA device is not detected on pci */
+ return 0;
+ }
+
+ serv_work = kzalloc(sizeof(*serv_work), GFP_ATOMIC);
+ if (!serv_work)
+ return -ENOMEM;
+
+ serv_work->event = event;
+ serv_work->gdma_dev = gd;
+
+ INIT_WORK(&serv_work->work, mana_handle_rdma_servicing);
+ queue_work(gc->service_wq, &serv_work->work);
+
+ return 0;
+}
+
int mana_probe(struct gdma_dev *gd, bool resuming)
{
struct gdma_context *gc = gd->gdma_context;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 89abf98..335d061 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -60,6 +60,7 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_DONE = 131,
GDMA_EQE_HWC_SOC_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
+ GDMA_EQE_HWC_SOC_SERVICE = 134,
GDMA_EQE_RNIC_QP_FATAL = 176,
};
@@ -70,6 +71,18 @@ enum {
GDMA_DEVICE_MANA_IB = 3,
};
+enum gdma_service_type {
+ GDMA_SERVICE_TYPE_NONE = 0,
+ GDMA_SERVICE_TYPE_RDMA_SUSPEND = 1,
+ GDMA_SERVICE_TYPE_RDMA_RESUME = 2,
+};
+
+struct mana_service_work {
+ struct work_struct work;
+ struct gdma_dev *gdma_dev;
+ enum gdma_service_type event;
+};
+
struct gdma_resource {
/* Protect the bitmap */
spinlock_t lock;
@@ -224,6 +237,7 @@ struct gdma_dev {
void *driver_data;
struct auxiliary_device *adev;
+ bool is_suspended;
};
/* MANA_PAGE_SIZE is the DMA unit */
@@ -409,6 +423,8 @@ struct gdma_context {
struct gdma_dev mana_ib;
u64 pf_cap_flags1;
+
+ struct workqueue_struct *service_wq;
};
#define MAX_NUM_GDMA_DEVICES 4
@@ -888,4 +904,6 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
void mana_register_debugfs(void);
void mana_unregister_debugfs(void);
+int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
+
#endif /* _GDMA_H */
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
index 158b125..83cf933 100644
--- a/include/net/mana/hw_channel.h
+++ b/include/net/mana/hw_channel.h
@@ -49,6 +49,15 @@ union hwc_init_type_data {
};
}; /* HW DATA */
+union hwc_init_soc_service_type {
+ u32 as_uint32;
+
+ struct {
+ u32 value : 28;
+ u32 type : 4;
+ };
+}; /* HW DATA */
+
struct hwc_rx_oob {
u32 type : 6;
u32 eom : 1;
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH rdma-next 4/4] net: mana: Add support for auxiliary device servicing events
2025-04-14 18:28 ` [PATCH rdma-next 4/4] net: mana: Add support for auxiliary device servicing events Konstantin Taranov
@ 2025-04-20 10:53 ` Leon Romanovsky
2025-04-24 2:33 ` Shiraz Saleem
0 siblings, 1 reply; 8+ messages in thread
From: Leon Romanovsky @ 2025-04-20 10:53 UTC (permalink / raw)
To: Konstantin Taranov
Cc: kotaranov, pabeni, haiyangz, kys, edumazet, kuba, davem, decui,
wei.liu, longli, jgg, linux-rdma, linux-kernel, netdev
On Mon, Apr 14, 2025 at 11:28:49AM -0700, Konstantin Taranov wrote:
> From: Shiraz Saleem <shirazsaleem@microsoft.com>
>
> Handle soc servcing events which require the rdma auxiliary device resources to
> be cleaned up during a suspend, and re-initialized during a resume.
>
> Signed-off-by: Shiraz Saleem <shirazsaleem@microsoft.com>
> Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> ---
> .../net/ethernet/microsoft/mana/gdma_main.c | 11 +++-
> .../net/ethernet/microsoft/mana/hw_channel.c | 19 ++++++
> drivers/net/ethernet/microsoft/mana/mana_en.c | 60 +++++++++++++++++++
> include/net/mana/gdma.h | 18 ++++++
> include/net/mana/hw_channel.h | 9 +++
> 5 files changed, 116 insertions(+), 1 deletion(-)
<...>
> @@ -1474,6 +1481,8 @@ static void mana_gd_cleanup(struct pci_dev *pdev)
> mana_hwc_destroy_channel(gc);
>
> mana_gd_remove_irqs(pdev);
> +
> + destroy_workqueue(gc->service_wq);
> }
<...>
> +static void mana_handle_rdma_servicing(struct work_struct *work)
> +{
> + struct mana_service_work *serv_work =
> + container_of(work, struct mana_service_work, work);
> + struct gdma_dev *gd = serv_work->gdma_dev;
> + struct device *dev = gd->gdma_context->dev;
> + int ret;
> +
> + switch (serv_work->event) {
> + case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
> + if (!gd->adev || gd->is_suspended)
> + break;
> +
> + remove_adev(gd);
> + gd->is_suspended = true;
> + break;
> +
> + case GDMA_SERVICE_TYPE_RDMA_RESUME:
> + if (!gd->is_suspended)
> + break;
> +
> + ret = add_adev(gd, "rdma");
> + if (ret)
> + dev_err(dev, "Failed to add adev on resume: %d\n", ret);
> + else
> + gd->is_suspended = false;
> + break;
> +
> + default:
> + dev_warn(dev, "unknown adev service event %u\n",
> + serv_work->event);
> + break;
> + }
> +
> + kfree(serv_work);
The series looks ok to me, except one question. Are you sure that it is
safe to have not-connected and not-locked general work while add_adev/remove_adev
can be called in parallel from different thread? For example getting event
GDMA_SERVICE_TYPE_RDMA_SUSPEND while mana_gd_probe() fails or some other
intervention with PCI (GDMA_SERVICE_TYPE_RDMA_SUSPEND and PCI shutdown).
What type of protection do you have here?
Thanks
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH rdma-next 4/4] net: mana: Add support for auxiliary device servicing events
2025-04-20 10:53 ` Leon Romanovsky
@ 2025-04-24 2:33 ` Shiraz Saleem
2025-04-24 10:49 ` Leon Romanovsky
0 siblings, 1 reply; 8+ messages in thread
From: Shiraz Saleem @ 2025-04-24 2:33 UTC (permalink / raw)
To: Leon Romanovsky, Konstantin Taranov
Cc: Konstantin Taranov, pabeni@redhat.com, Haiyang Zhang,
KY Srinivasan, edumazet@google.com, kuba@kernel.org,
davem@davemloft.net, Dexuan Cui, wei.liu@kernel.org, Long Li,
jgg@ziepe.ca, linux-rdma@vger.kernel.org,
linux-kernel@vger.kernel.org, netdev@vger.kernel.org
> Subject: [EXTERNAL] Re: [PATCH rdma-next 4/4] net: mana: Add support for
> auxiliary device servicing events
>
> On Mon, Apr 14, 2025 at 11:28:49AM -0700, Konstantin Taranov wrote:
> > From: Shiraz Saleem <shirazsaleem@microsoft.com>
> >
> > Handle soc servcing events which require the rdma auxiliary device
> > resources to be cleaned up during a suspend, and re-initialized during a
> resume.
> >
> > Signed-off-by: Shiraz Saleem <shirazsaleem@microsoft.com>
> > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > ---
> > .../net/ethernet/microsoft/mana/gdma_main.c | 11 +++-
> > .../net/ethernet/microsoft/mana/hw_channel.c | 19 ++++++
> > drivers/net/ethernet/microsoft/mana/mana_en.c | 60
> +++++++++++++++++++
> > include/net/mana/gdma.h | 18 ++++++
> > include/net/mana/hw_channel.h | 9 +++
> > 5 files changed, 116 insertions(+), 1 deletion(-)
>
> <...>
>
> > @@ -1474,6 +1481,8 @@ static void mana_gd_cleanup(struct pci_dev
> *pdev)
> > mana_hwc_destroy_channel(gc);
> >
> > mana_gd_remove_irqs(pdev);
> > +
> > + destroy_workqueue(gc->service_wq);
> > }
>
> <...>
>
> > +static void mana_handle_rdma_servicing(struct work_struct *work) {
> > + struct mana_service_work *serv_work =
> > + container_of(work, struct mana_service_work, work);
> > + struct gdma_dev *gd = serv_work->gdma_dev;
> > + struct device *dev = gd->gdma_context->dev;
> > + int ret;
> > +
> > + switch (serv_work->event) {
> > + case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
> > + if (!gd->adev || gd->is_suspended)
> > + break;
> > +
> > + remove_adev(gd);
> > + gd->is_suspended = true;
> > + break;
> > +
> > + case GDMA_SERVICE_TYPE_RDMA_RESUME:
> > + if (!gd->is_suspended)
> > + break;
> > +
> > + ret = add_adev(gd, "rdma");
> > + if (ret)
> > + dev_err(dev, "Failed to add adev on resume: %d\n",
> ret);
> > + else
> > + gd->is_suspended = false;
> > + break;
> > +
> > + default:
> > + dev_warn(dev, "unknown adev service event %u\n",
> > + serv_work->event);
> > + break;
> > + }
> > +
> > + kfree(serv_work);
>
> The series looks ok to me, except one question. Are you sure that it is safe to
> have not-connected and not-locked general work while
> add_adev/remove_adev can be called in parallel from different thread? For
> example getting event GDMA_SERVICE_TYPE_RDMA_SUSPEND while
> mana_gd_probe() fails or some other intervention with PCI
> (GDMA_SERVICE_TYPE_RDMA_SUSPEND and PCI shutdown).
>
> What type of protection do you have here?
>
Hi Leon,
Thanks for spotting this.
There are two cases.
-Probe / Resume
add_adev() stores gd->adev only after auxiliary_device_add() succeeds.
While gd->adev is still NULL the worker drops any GDMA_SERVICE_TYPE_RDMA_SUSPEND event, so an early suspend that arrives during probe is harmless and cannot race with the later add_adev().
-Remove / Suspend / Shutdown
During teardown the worker may still be inside add_adev()/remove_adev() while the PCI thread starts its own remove_adev().
In v2 I ll serialize them with flag + flush pattern.
void mana_rdma_remove(struct gdma_dev *gd)
{
[....]
WRITE_ONCE(gd->rdma_teardown, true); /* block new events */
flush_workqueue(gc->service_wq); /* wait running worker */
if (gd->adev)
remove_adev(gd);
[....]
}
i.e. during teardown, we stop the producer and drain the queue
and,
static void mana_handle_rdma_servicing(struct work_struct *work)
{
[....]
if (READ_ONCE(gd->rdma_teardown))
goto out;
[.....]
}
The flag blocks any new work, and flush_workqueue() waits for anything already running. This serialises the two paths and removes
the race you pointed out.
Shiraz
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH rdma-next 4/4] net: mana: Add support for auxiliary device servicing events
2025-04-24 2:33 ` Shiraz Saleem
@ 2025-04-24 10:49 ` Leon Romanovsky
0 siblings, 0 replies; 8+ messages in thread
From: Leon Romanovsky @ 2025-04-24 10:49 UTC (permalink / raw)
To: Shiraz Saleem
Cc: Konstantin Taranov, Konstantin Taranov, pabeni@redhat.com,
Haiyang Zhang, KY Srinivasan, edumazet@google.com,
kuba@kernel.org, davem@davemloft.net, Dexuan Cui,
wei.liu@kernel.org, Long Li, jgg@ziepe.ca,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org
On Thu, Apr 24, 2025 at 02:33:24AM +0000, Shiraz Saleem wrote:
> > Subject: [EXTERNAL] Re: [PATCH rdma-next 4/4] net: mana: Add support for
> > auxiliary device servicing events
> >
> > On Mon, Apr 14, 2025 at 11:28:49AM -0700, Konstantin Taranov wrote:
> > > From: Shiraz Saleem <shirazsaleem@microsoft.com>
> > >
> > > Handle soc servcing events which require the rdma auxiliary device
> > > resources to be cleaned up during a suspend, and re-initialized during a
> > resume.
> > >
> > > Signed-off-by: Shiraz Saleem <shirazsaleem@microsoft.com>
> > > Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
> > > ---
> > > .../net/ethernet/microsoft/mana/gdma_main.c | 11 +++-
> > > .../net/ethernet/microsoft/mana/hw_channel.c | 19 ++++++
> > > drivers/net/ethernet/microsoft/mana/mana_en.c | 60
> > +++++++++++++++++++
> > > include/net/mana/gdma.h | 18 ++++++
> > > include/net/mana/hw_channel.h | 9 +++
> > > 5 files changed, 116 insertions(+), 1 deletion(-)
> >
> > <...>
> >
> > > @@ -1474,6 +1481,8 @@ static void mana_gd_cleanup(struct pci_dev
> > *pdev)
> > > mana_hwc_destroy_channel(gc);
> > >
> > > mana_gd_remove_irqs(pdev);
> > > +
> > > + destroy_workqueue(gc->service_wq);
> > > }
> >
> > <...>
> >
> > > +static void mana_handle_rdma_servicing(struct work_struct *work) {
> > > + struct mana_service_work *serv_work =
> > > + container_of(work, struct mana_service_work, work);
> > > + struct gdma_dev *gd = serv_work->gdma_dev;
> > > + struct device *dev = gd->gdma_context->dev;
> > > + int ret;
> > > +
> > > + switch (serv_work->event) {
> > > + case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
> > > + if (!gd->adev || gd->is_suspended)
> > > + break;
> > > +
> > > + remove_adev(gd);
> > > + gd->is_suspended = true;
> > > + break;
> > > +
> > > + case GDMA_SERVICE_TYPE_RDMA_RESUME:
> > > + if (!gd->is_suspended)
> > > + break;
> > > +
> > > + ret = add_adev(gd, "rdma");
> > > + if (ret)
> > > + dev_err(dev, "Failed to add adev on resume: %d\n",
> > ret);
> > > + else
> > > + gd->is_suspended = false;
> > > + break;
> > > +
> > > + default:
> > > + dev_warn(dev, "unknown adev service event %u\n",
> > > + serv_work->event);
> > > + break;
> > > + }
> > > +
> > > + kfree(serv_work);
> >
> > The series looks ok to me, except one question. Are you sure that it is safe to
> > have not-connected and not-locked general work while
> > add_adev/remove_adev can be called in parallel from different thread? For
> > example getting event GDMA_SERVICE_TYPE_RDMA_SUSPEND while
> > mana_gd_probe() fails or some other intervention with PCI
> > (GDMA_SERVICE_TYPE_RDMA_SUSPEND and PCI shutdown).
> >
> > What type of protection do you have here?
> >
> Hi Leon,
>
> Thanks for spotting this.
>
> There are two cases.
>
> -Probe / Resume
> add_adev() stores gd->adev only after auxiliary_device_add() succeeds.
> While gd->adev is still NULL the worker drops any GDMA_SERVICE_TYPE_RDMA_SUSPEND event, so an early suspend that arrives during probe is harmless and cannot race with the later add_adev().
>
> -Remove / Suspend / Shutdown
> During teardown the worker may still be inside add_adev()/remove_adev() while the PCI thread starts its own remove_adev().
>
> In v2 I ll serialize them with flag + flush pattern.
>
> void mana_rdma_remove(struct gdma_dev *gd)
> {
> [....]
> WRITE_ONCE(gd->rdma_teardown, true); /* block new events */
> flush_workqueue(gc->service_wq); /* wait running worker */
>
> if (gd->adev)
> remove_adev(gd);
>
> [....]
> }
> i.e. during teardown, we stop the producer and drain the queue
>
> and,
>
> static void mana_handle_rdma_servicing(struct work_struct *work)
> {
> [....]
> if (READ_ONCE(gd->rdma_teardown))
> goto out;
> [.....]
> }
> The flag blocks any new work, and flush_workqueue() waits for anything already running. This serialises the two paths and removes
> the race you pointed out.
Yes, I also think that it solves, so let's post v2 please.
Just remember that WRITE_ONCE/READ_ONCE is not a replacement for locks.
Thanks
>
> Shiraz
>
^ permalink raw reply [flat|nested] 8+ messages in thread