* [PATCH net 0/2] net: make memory provider install / close paths more common
@ 2025-03-31 19:42 Jakub Kicinski
2025-03-31 19:43 ` [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq() Jakub Kicinski
` (2 more replies)
0 siblings, 3 replies; 13+ messages in thread
From: Jakub Kicinski @ 2025-03-31 19:42 UTC (permalink / raw)
To: davem
Cc: netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
asml.silence, almasrymina, dw, sdf, Jakub Kicinski
We seem to be fixing bugs in config path for devmem which also exist
in the io_uring ZC path. Let's try to make the two paths more common,
otherwise this is bound to keep happening.
Found by code inspection and compile tested only.
Jakub Kicinski (2):
net: move mp dev config validation to __net_mp_open_rxq()
net: avoid false positive warnings in __net_mp_close_rxq()
include/net/page_pool/memory_provider.h | 6 +++
net/core/devmem.c | 64 ++++++------------------
net/core/netdev-genl.c | 6 ---
net/core/netdev_rx_queue.c | 66 ++++++++++++++++++-------
4 files changed, 69 insertions(+), 73 deletions(-)
--
2.49.0
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq()
2025-03-31 19:42 [PATCH net 0/2] net: make memory provider install / close paths more common Jakub Kicinski
@ 2025-03-31 19:43 ` Jakub Kicinski
2025-03-31 20:39 ` Stanislav Fomichev
2025-04-01 11:37 ` Pavel Begunkov
2025-03-31 19:43 ` [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq() Jakub Kicinski
2025-04-02 15:35 ` [PATCH net 0/2] net: make memory provider install / close paths more common Taehee Yoo
2 siblings, 2 replies; 13+ messages in thread
From: Jakub Kicinski @ 2025-03-31 19:43 UTC (permalink / raw)
To: davem
Cc: netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
asml.silence, almasrymina, dw, sdf, Jakub Kicinski
devmem code performs a number of safety checks to avoid having
to reimplement all of them in the drivers. Move those to
__net_mp_open_rxq() and reuse that function for binding to make
sure that io_uring ZC also benefits from them.
While at it rename the queue ID variable to rxq_idx in
__net_mp_open_rxq(), we touch most of the relevant lines.
Fixes: 6e18ed929d3b ("net: add helpers for setting a memory provider on an rx queue")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
CC: ap420073@gmail.com
CC: almasrymina@google.com
CC: asml.silence@gmail.com
CC: dw@davidwei.uk
CC: sdf@fomichev.me
---
include/net/page_pool/memory_provider.h | 6 +++
net/core/devmem.c | 52 ++++++-------------------
net/core/netdev-genl.c | 6 ---
net/core/netdev_rx_queue.c | 49 +++++++++++++++++------
4 files changed, 55 insertions(+), 58 deletions(-)
diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h
index b3e665897767..3724ac3c9fb2 100644
--- a/include/net/page_pool/memory_provider.h
+++ b/include/net/page_pool/memory_provider.h
@@ -6,6 +6,7 @@
#include <net/page_pool/types.h>
struct netdev_rx_queue;
+struct netlink_ext_ack;
struct sk_buff;
struct memory_provider_ops {
@@ -24,8 +25,13 @@ void net_mp_niov_clear_page_pool(struct net_iov *niov);
int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *p);
+int __net_mp_open_rxq(struct net_device *dev, unsigned int ifq_idx,
+ const struct pp_memory_provider_params *p,
+ struct netlink_ext_ack *extack);
void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *old_p);
+void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+ const struct pp_memory_provider_params *old_p);
/**
* net_mp_netmem_place_in_cache() - give a netmem to a page pool
diff --git a/net/core/devmem.c b/net/core/devmem.c
index ee145a2aa41c..f2ce3c2ebc97 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -8,7 +8,6 @@
*/
#include <linux/dma-buf.h>
-#include <linux/ethtool_netlink.h>
#include <linux/genalloc.h>
#include <linux/mm.h>
#include <linux/netdevice.h>
@@ -143,57 +142,28 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
struct net_devmem_dmabuf_binding *binding,
struct netlink_ext_ack *extack)
{
+ struct pp_memory_provider_params mp_params = {
+ .mp_priv = binding,
+ .mp_ops = &dmabuf_devmem_ops,
+ };
struct netdev_rx_queue *rxq;
u32 xa_idx;
int err;
- if (rxq_idx >= dev->real_num_rx_queues) {
- NL_SET_ERR_MSG(extack, "rx queue index out of range");
- return -ERANGE;
- }
-
- if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
- NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
- return -EINVAL;
- }
-
- if (dev->cfg->hds_thresh) {
- NL_SET_ERR_MSG(extack, "hds-thresh is not zero");
- return -EINVAL;
- }
-
- rxq = __netif_get_rx_queue(dev, rxq_idx);
- if (rxq->mp_params.mp_ops) {
- NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
- return -EEXIST;
- }
-
-#ifdef CONFIG_XDP_SOCKETS
- if (rxq->pool) {
- NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
- return -EBUSY;
- }
-#endif
-
- err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
- GFP_KERNEL);
+ err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
if (err)
return err;
- rxq->mp_params.mp_priv = binding;
- rxq->mp_params.mp_ops = &dmabuf_devmem_ops;
-
- err = netdev_rx_queue_restart(dev, rxq_idx);
+ rxq = __netif_get_rx_queue(dev, rxq_idx);
+ err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
+ GFP_KERNEL);
if (err)
- goto err_xa_erase;
+ goto err_close_rxq;
return 0;
-err_xa_erase:
- rxq->mp_params.mp_priv = NULL;
- rxq->mp_params.mp_ops = NULL;
- xa_erase(&binding->bound_rxqs, xa_idx);
-
+err_close_rxq:
+ __net_mp_close_rxq(dev, rxq_idx, &mp_params);
return err;
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index fd1cfa9707dc..8ad4a944f368 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -874,12 +874,6 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
goto err_unlock;
}
- if (dev_xdp_prog_count(netdev)) {
- NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached");
- err = -EEXIST;
- goto err_unlock;
- }
-
binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack);
if (IS_ERR(binding)) {
err = PTR_ERR(binding);
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index 3af716f77a13..556b5393ec9f 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/ethtool_netlink.h>
#include <linux/netdevice.h>
#include <net/netdev_lock.h>
#include <net/netdev_queues.h>
@@ -86,8 +87,9 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
-static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
- struct pp_memory_provider_params *p)
+int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+ const struct pp_memory_provider_params *p,
+ struct netlink_ext_ack *extack)
{
struct netdev_rx_queue *rxq;
int ret;
@@ -95,16 +97,41 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
if (!netdev_need_ops_lock(dev))
return -EOPNOTSUPP;
- if (ifq_idx >= dev->real_num_rx_queues)
+ if (rxq_idx >= dev->real_num_rx_queues)
return -EINVAL;
- ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues);
+ rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
- rxq = __netif_get_rx_queue(dev, ifq_idx);
- if (rxq->mp_params.mp_ops)
+ if (rxq_idx >= dev->real_num_rx_queues) {
+ NL_SET_ERR_MSG(extack, "rx queue index out of range");
+ return -ERANGE;
+ }
+ if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
+ NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
+ return -EINVAL;
+ }
+ if (dev->cfg->hds_thresh) {
+ NL_SET_ERR_MSG(extack, "hds-thresh is not zero");
+ return -EINVAL;
+ }
+ if (dev_xdp_prog_count(dev)) {
+ NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached");
return -EEXIST;
+ }
+
+ rxq = __netif_get_rx_queue(dev, rxq_idx);
+ if (rxq->mp_params.mp_ops) {
+ NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
+ return -EEXIST;
+ }
+#ifdef CONFIG_XDP_SOCKETS
+ if (rxq->pool) {
+ NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
+ return -EBUSY;
+ }
+#endif
rxq->mp_params = *p;
- ret = netdev_rx_queue_restart(dev, ifq_idx);
+ ret = netdev_rx_queue_restart(dev, rxq_idx);
if (ret) {
rxq->mp_params.mp_ops = NULL;
rxq->mp_params.mp_priv = NULL;
@@ -112,19 +139,19 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
return ret;
}
-int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
struct pp_memory_provider_params *p)
{
int ret;
netdev_lock(dev);
- ret = __net_mp_open_rxq(dev, ifq_idx, p);
+ ret = __net_mp_open_rxq(dev, rxq_idx, p, NULL);
netdev_unlock(dev);
return ret;
}
-static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
- struct pp_memory_provider_params *old_p)
+void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+ const struct pp_memory_provider_params *old_p)
{
struct netdev_rx_queue *rxq;
--
2.49.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq()
2025-03-31 19:42 [PATCH net 0/2] net: make memory provider install / close paths more common Jakub Kicinski
2025-03-31 19:43 ` [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq() Jakub Kicinski
@ 2025-03-31 19:43 ` Jakub Kicinski
2025-03-31 20:41 ` Stanislav Fomichev
2025-04-02 18:52 ` Mina Almasry
2025-04-02 15:35 ` [PATCH net 0/2] net: make memory provider install / close paths more common Taehee Yoo
2 siblings, 2 replies; 13+ messages in thread
From: Jakub Kicinski @ 2025-03-31 19:43 UTC (permalink / raw)
To: davem
Cc: netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
asml.silence, almasrymina, dw, sdf, Jakub Kicinski
Commit under Fixes solved the problem of spurious warnings when we
uninstall an MP from a device while its down. The __net_mp_close_rxq()
which is used by io_uring was not fixed. Move the fix over and reuse
__net_mp_close_rxq() in the devmem path.
Fixes: a70f891e0fa0 ("net: devmem: do not WARN conditionally after netdev_rx_queue_restart()")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
net/core/devmem.c | 12 +++++-------
net/core/netdev_rx_queue.c | 17 +++++++++--------
2 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/net/core/devmem.c b/net/core/devmem.c
index f2ce3c2ebc97..6e27a47d0493 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -116,21 +116,19 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
struct netdev_rx_queue *rxq;
unsigned long xa_idx;
unsigned int rxq_idx;
- int err;
if (binding->list.next)
list_del(&binding->list);
xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
- WARN_ON(rxq->mp_params.mp_priv != binding);
-
- rxq->mp_params.mp_priv = NULL;
- rxq->mp_params.mp_ops = NULL;
+ const struct pp_memory_provider_params mp_params = {
+ .mp_priv = binding,
+ .mp_ops = &dmabuf_devmem_ops,
+ };
rxq_idx = get_netdev_rx_queue_index(rxq);
- err = netdev_rx_queue_restart(binding->dev, rxq_idx);
- WARN_ON(err && err != -ENETDOWN);
+ __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params);
}
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index 556b5393ec9f..3e906c2950bd 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -154,17 +154,13 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
const struct pp_memory_provider_params *old_p)
{
struct netdev_rx_queue *rxq;
+ int err;
if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
return;
rxq = __netif_get_rx_queue(dev, ifq_idx);
-
- /* Callers holding a netdev ref may get here after we already
- * went thru shutdown via dev_memory_provider_uninstall().
- */
- if (dev->reg_state > NETREG_REGISTERED &&
- !rxq->mp_params.mp_ops)
+ if (!rxq->mp_params.mp_ops)
return;
if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops ||
@@ -173,13 +169,18 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
rxq->mp_params.mp_ops = NULL;
rxq->mp_params.mp_priv = NULL;
- WARN_ON(netdev_rx_queue_restart(dev, ifq_idx));
+ err = netdev_rx_queue_restart(dev, ifq_idx);
+ WARN_ON(err && err != -ENETDOWN);
}
void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
struct pp_memory_provider_params *old_p)
{
netdev_lock(dev);
- __net_mp_close_rxq(dev, ifq_idx, old_p);
+ /* Callers holding a netdev ref may get here after we already
+ * went thru shutdown via dev_memory_provider_uninstall().
+ */
+ if (dev->reg_state <= NETREG_REGISTERED)
+ __net_mp_close_rxq(dev, ifq_idx, old_p);
netdev_unlock(dev);
}
--
2.49.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq()
2025-03-31 19:43 ` [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq() Jakub Kicinski
@ 2025-03-31 20:39 ` Stanislav Fomichev
2025-04-01 11:37 ` Pavel Begunkov
1 sibling, 0 replies; 13+ messages in thread
From: Stanislav Fomichev @ 2025-03-31 20:39 UTC (permalink / raw)
To: Jakub Kicinski
Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
asml.silence, almasrymina, dw, sdf
On 03/31, Jakub Kicinski wrote:
> devmem code performs a number of safety checks to avoid having
> to reimplement all of them in the drivers. Move those to
> __net_mp_open_rxq() and reuse that function for binding to make
> sure that io_uring ZC also benefits from them.
>
> While at it rename the queue ID variable to rxq_idx in
> __net_mp_open_rxq(), we touch most of the relevant lines.
>
> Fixes: 6e18ed929d3b ("net: add helpers for setting a memory provider on an rx queue")
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> CC: ap420073@gmail.com
> CC: almasrymina@google.com
> CC: asml.silence@gmail.com
> CC: dw@davidwei.uk
> CC: sdf@fomichev.me
> ---
> include/net/page_pool/memory_provider.h | 6 +++
> net/core/devmem.c | 52 ++++++-------------------
> net/core/netdev-genl.c | 6 ---
> net/core/netdev_rx_queue.c | 49 +++++++++++++++++------
> 4 files changed, 55 insertions(+), 58 deletions(-)
>
> diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h
> index b3e665897767..3724ac3c9fb2 100644
> --- a/include/net/page_pool/memory_provider.h
> +++ b/include/net/page_pool/memory_provider.h
> @@ -6,6 +6,7 @@
> #include <net/page_pool/types.h>
>
> struct netdev_rx_queue;
> +struct netlink_ext_ack;
> struct sk_buff;
>
> struct memory_provider_ops {
> @@ -24,8 +25,13 @@ void net_mp_niov_clear_page_pool(struct net_iov *niov);
>
> int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
> struct pp_memory_provider_params *p);
> +int __net_mp_open_rxq(struct net_device *dev, unsigned int ifq_idx,
> + const struct pp_memory_provider_params *p,
> + struct netlink_ext_ack *extack);
Nit: you keep using ifq_idx here, but in the .c file you rename it to
rxq_idx. Not worth the respin..
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq()
2025-03-31 19:43 ` [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq() Jakub Kicinski
@ 2025-03-31 20:41 ` Stanislav Fomichev
2025-04-02 18:52 ` Mina Almasry
1 sibling, 0 replies; 13+ messages in thread
From: Stanislav Fomichev @ 2025-03-31 20:41 UTC (permalink / raw)
To: Jakub Kicinski
Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
asml.silence, almasrymina, dw, sdf
On 03/31, Jakub Kicinski wrote:
> Commit under Fixes solved the problem of spurious warnings when we
> uninstall an MP from a device while its down. The __net_mp_close_rxq()
> which is used by io_uring was not fixed. Move the fix over and reuse
> __net_mp_close_rxq() in the devmem path.
>
> Fixes: a70f891e0fa0 ("net: devmem: do not WARN conditionally after netdev_rx_queue_restart()")
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq()
2025-03-31 19:43 ` [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq() Jakub Kicinski
2025-03-31 20:39 ` Stanislav Fomichev
@ 2025-04-01 11:37 ` Pavel Begunkov
2025-04-01 15:00 ` Jakub Kicinski
1 sibling, 1 reply; 13+ messages in thread
From: Pavel Begunkov @ 2025-04-01 11:37 UTC (permalink / raw)
To: Jakub Kicinski, davem
Cc: netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
almasrymina, dw, sdf
On 3/31/25 20:43, Jakub Kicinski wrote:
> devmem code performs a number of safety checks to avoid having
> to reimplement all of them in the drivers. Move those to
> __net_mp_open_rxq() and reuse that function for binding to make
> sure that io_uring ZC also benefits from them.
>
> While at it rename the queue ID variable to rxq_idx in
> __net_mp_open_rxq(), we touch most of the relevant lines.
Looks good, one question below
...
> diff --git a/net/core/devmem.c b/net/core/devmem.c
> index ee145a2aa41c..f2ce3c2ebc97 100644
> --- a/net/core/devmem.c
> +++ b/net/core/devmem.c
> @@ -8,7 +8,6 @@
...
> -
> - err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
> - GFP_KERNEL);
> + err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
> if (err)
> return err;
Was reversing the order b/w open and xa_alloc intentional?
It didn't need __net_mp_close_rxq() before, which is a good thing
considering the error handling in __net_mp_close_rxq is a bit
flaky (i.e. the WARN_ON at the end).
>
> - rxq->mp_params.mp_priv = binding;
> - rxq->mp_params.mp_ops = &dmabuf_devmem_ops;
> -
> - err = netdev_rx_queue_restart(dev, rxq_idx);
> + rxq = __netif_get_rx_queue(dev, rxq_idx);
> + err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
> + GFP_KERNEL);
> if (err)
> - goto err_xa_erase;
> + goto err_close_rxq;
>
> return 0;
>
> -err_xa_erase:
> - rxq->mp_params.mp_priv = NULL;
> - rxq->mp_params.mp_ops = NULL;
> - xa_erase(&binding->bound_rxqs, xa_idx);
> -
> +err_close_rxq:
> + __net_mp_close_rxq(dev, rxq_idx, &mp_params);
> return err;
> }
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq()
2025-04-01 11:37 ` Pavel Begunkov
@ 2025-04-01 15:00 ` Jakub Kicinski
2025-04-02 12:22 ` Pavel Begunkov
2025-04-02 18:46 ` Mina Almasry
0 siblings, 2 replies; 13+ messages in thread
From: Jakub Kicinski @ 2025-04-01 15:00 UTC (permalink / raw)
To: Pavel Begunkov
Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
almasrymina, dw, sdf
On Tue, 1 Apr 2025 12:37:34 +0100 Pavel Begunkov wrote:
> > - err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
> > - GFP_KERNEL);
> > + err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
> > if (err)
> > return err;
>
> Was reversing the order b/w open and xa_alloc intentional?
> It didn't need __net_mp_close_rxq() before, which is a good thing
> considering the error handling in __net_mp_close_rxq is a bit
> flaky (i.e. the WARN_ON at the end).
Should have mentioned.. yes, intentional, otherwise we'd either have to
insert a potentially invalid rxq pointer into the xarray or duplicate
the rxq bounds check. Inserting invalid pointer and deleting it immediately
should be okay, since readers take the instance lock, but felt a little
dirty. In practice xa_alloc() failures should be extremely rare here so
I went with the reorder.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq()
2025-04-01 15:00 ` Jakub Kicinski
@ 2025-04-02 12:22 ` Pavel Begunkov
2025-04-02 18:46 ` Mina Almasry
1 sibling, 0 replies; 13+ messages in thread
From: Pavel Begunkov @ 2025-04-02 12:22 UTC (permalink / raw)
To: Jakub Kicinski
Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
almasrymina, dw, sdf
On 4/1/25 16:00, Jakub Kicinski wrote:
> On Tue, 1 Apr 2025 12:37:34 +0100 Pavel Begunkov wrote:
>>> - err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
>>> - GFP_KERNEL);
>>> + err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
>>> if (err)
>>> return err;
>>
>> Was reversing the order b/w open and xa_alloc intentional?
>> It didn't need __net_mp_close_rxq() before, which is a good thing
>> considering the error handling in __net_mp_close_rxq is a bit
>> flaky (i.e. the WARN_ON at the end).
>
> Should have mentioned.. yes, intentional, otherwise we'd either have to
> insert a potentially invalid rxq pointer into the xarray or duplicate
> the rxq bounds check. Inserting invalid pointer and deleting it immediately
> should be okay, since readers take the instance lock, but felt a little
> dirty. In practice xa_alloc() failures should be extremely rare here so
> I went with the reorder.
I see, sgtm
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net 0/2] net: make memory provider install / close paths more common
2025-03-31 19:42 [PATCH net 0/2] net: make memory provider install / close paths more common Jakub Kicinski
2025-03-31 19:43 ` [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq() Jakub Kicinski
2025-03-31 19:43 ` [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq() Jakub Kicinski
@ 2025-04-02 15:35 ` Taehee Yoo
2 siblings, 0 replies; 13+ messages in thread
From: Taehee Yoo @ 2025-04-02 15:35 UTC (permalink / raw)
To: Jakub Kicinski
Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms,
asml.silence, almasrymina, dw, sdf
On Tue, Apr 1, 2025 at 4:42 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
Hi Jakub,
Thanks a lot for this fix!
> We seem to be fixing bugs in config path for devmem which also exist
> in the io_uring ZC path. Let's try to make the two paths more common,
> otherwise this is bound to keep happening.
>
> Found by code inspection and compile tested only.
I tested this patchset with my RFC[1].
This works well, and I can't see any splat by kmemleak, lockdep, etc.
[1]https://lore.kernel.org/netdev/20250331114729.594603-1-ap420073@gmail.com/
Thanks a lot!
Taehee Yoo
>
> Jakub Kicinski (2):
> net: move mp dev config validation to __net_mp_open_rxq()
> net: avoid false positive warnings in __net_mp_close_rxq()
>
> include/net/page_pool/memory_provider.h | 6 +++
> net/core/devmem.c | 64 ++++++------------------
> net/core/netdev-genl.c | 6 ---
> net/core/netdev_rx_queue.c | 66 ++++++++++++++++++-------
> 4 files changed, 69 insertions(+), 73 deletions(-)
>
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq()
2025-04-01 15:00 ` Jakub Kicinski
2025-04-02 12:22 ` Pavel Begunkov
@ 2025-04-02 18:46 ` Mina Almasry
1 sibling, 0 replies; 13+ messages in thread
From: Mina Almasry @ 2025-04-02 18:46 UTC (permalink / raw)
To: Jakub Kicinski
Cc: Pavel Begunkov, davem, netdev, edumazet, pabeni, andrew+netdev,
horms, ap420073, dw, sdf
On Tue, Apr 1, 2025 at 8:00 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 1 Apr 2025 12:37:34 +0100 Pavel Begunkov wrote:
> > > - err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
> > > - GFP_KERNEL);
> > > + err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
> > > if (err)
> > > return err;
> >
> > Was reversing the order b/w open and xa_alloc intentional?
> > It didn't need __net_mp_close_rxq() before, which is a good thing
> > considering the error handling in __net_mp_close_rxq is a bit
> > flaky (i.e. the WARN_ON at the end).
>
> Should have mentioned.. yes, intentional, otherwise we'd either have to
> insert a potentially invalid rxq pointer into the xarray or duplicate
> the rxq bounds check. Inserting invalid pointer and deleting it immediately
> should be okay, since readers take the instance lock, but felt a little
> dirty. In practice xa_alloc() failures should be extremely rare here so
> I went with the reorder.
FWIW yes I imagine accessors of binding->bound_rxqs should all have
binding->dev locked at that point (and rtnl_locked before the locking
changes), and I preferred the other order just because restarting the
queue seemed like a heavy operation with all the ndos it calls, even
if it doesn't hit the WARN_ON.
But the error path should be rare anyway, and I think that works too.
I could not find other issues in the patch.
Reviewed-by: Mina Almasry <almasrymina@google.com>
--
Thanks,
Mina
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq()
2025-03-31 19:43 ` [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq() Jakub Kicinski
2025-03-31 20:41 ` Stanislav Fomichev
@ 2025-04-02 18:52 ` Mina Almasry
2025-04-02 23:24 ` Jakub Kicinski
1 sibling, 1 reply; 13+ messages in thread
From: Mina Almasry @ 2025-04-02 18:52 UTC (permalink / raw)
To: Jakub Kicinski
Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
asml.silence, dw, sdf
On Mon, Mar 31, 2025 at 12:43 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> Commit under Fixes solved the problem of spurious warnings when we
> uninstall an MP from a device while its down. The __net_mp_close_rxq()
> which is used by io_uring was not fixed. Move the fix over and reuse
> __net_mp_close_rxq() in the devmem path.
>
> Fixes: a70f891e0fa0 ("net: devmem: do not WARN conditionally after netdev_rx_queue_restart()")
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> net/core/devmem.c | 12 +++++-------
> net/core/netdev_rx_queue.c | 17 +++++++++--------
> 2 files changed, 14 insertions(+), 15 deletions(-)
>
> diff --git a/net/core/devmem.c b/net/core/devmem.c
> index f2ce3c2ebc97..6e27a47d0493 100644
> --- a/net/core/devmem.c
> +++ b/net/core/devmem.c
> @@ -116,21 +116,19 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
> struct netdev_rx_queue *rxq;
> unsigned long xa_idx;
> unsigned int rxq_idx;
> - int err;
>
> if (binding->list.next)
> list_del(&binding->list);
>
> xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
> - WARN_ON(rxq->mp_params.mp_priv != binding);
> -
> - rxq->mp_params.mp_priv = NULL;
> - rxq->mp_params.mp_ops = NULL;
> + const struct pp_memory_provider_params mp_params = {
> + .mp_priv = binding,
> + .mp_ops = &dmabuf_devmem_ops,
> + };
>
> rxq_idx = get_netdev_rx_queue_index(rxq);
>
> - err = netdev_rx_queue_restart(binding->dev, rxq_idx);
> - WARN_ON(err && err != -ENETDOWN);
> + __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params);
> }
>
> xa_erase(&net_devmem_dmabuf_bindings, binding->id);
> diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
> index 556b5393ec9f..3e906c2950bd 100644
> --- a/net/core/netdev_rx_queue.c
> +++ b/net/core/netdev_rx_queue.c
> @@ -154,17 +154,13 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
> const struct pp_memory_provider_params *old_p)
> {
> struct netdev_rx_queue *rxq;
> + int err;
>
> if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
> return;
>
> rxq = __netif_get_rx_queue(dev, ifq_idx);
> -
> - /* Callers holding a netdev ref may get here after we already
> - * went thru shutdown via dev_memory_provider_uninstall().
> - */
> - if (dev->reg_state > NETREG_REGISTERED &&
> - !rxq->mp_params.mp_ops)
> + if (!rxq->mp_params.mp_ops)
> return;
>
> if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops ||
> @@ -173,13 +169,18 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
>
> rxq->mp_params.mp_ops = NULL;
> rxq->mp_params.mp_priv = NULL;
> - WARN_ON(netdev_rx_queue_restart(dev, ifq_idx));
> + err = netdev_rx_queue_restart(dev, ifq_idx);
> + WARN_ON(err && err != -ENETDOWN);
> }
>
> void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
> struct pp_memory_provider_params *old_p)
> {
> netdev_lock(dev);
> - __net_mp_close_rxq(dev, ifq_idx, old_p);
> + /* Callers holding a netdev ref may get here after we already
> + * went thru shutdown via dev_memory_provider_uninstall().
> + */
> + if (dev->reg_state <= NETREG_REGISTERED)
> + __net_mp_close_rxq(dev, ifq_idx, old_p);
Not obvious to me why this check was moved. Do you expect to call
__net_mp_close_rxq on an unregistered netdev and expect it to succeed
in io_uring binding or something?
--
Thanks,
Mina
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq()
2025-04-02 18:52 ` Mina Almasry
@ 2025-04-02 23:24 ` Jakub Kicinski
2025-04-03 1:27 ` Jakub Kicinski
0 siblings, 1 reply; 13+ messages in thread
From: Jakub Kicinski @ 2025-04-02 23:24 UTC (permalink / raw)
To: Mina Almasry, sdf
Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
asml.silence, dw
On Wed, 2 Apr 2025 11:52:50 -0700 Mina Almasry wrote:
> > netdev_lock(dev);
> > - __net_mp_close_rxq(dev, ifq_idx, old_p);
> > + /* Callers holding a netdev ref may get here after we already
> > + * went thru shutdown via dev_memory_provider_uninstall().
> > + */
> > + if (dev->reg_state <= NETREG_REGISTERED)
> > + __net_mp_close_rxq(dev, ifq_idx, old_p);
>
> Not obvious to me why this check was moved. Do you expect to call
> __net_mp_close_rxq on an unregistered netdev and expect it to succeed
> in io_uring binding or something?
Yes, iouring state is under spin lock it can't call in here atomically.
device unregister may race with iouring shutdown.
Now that I look at it I think we need
diff --git a/net/core/dev.c b/net/core/dev.c
index be17e0660144..0a70080a1209 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11947,6 +11947,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
unlist_netdevice(dev);
netdev_lock(dev);
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
+ dev_memory_provider_uninstall(dev);
netdev_unlock(dev);
}
flush_all_backlogs();
@@ -11961,7 +11962,6 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
netdev_lock_ops(dev);
dev_xdp_uninstall(dev);
- dev_memory_provider_uninstall(dev);
netdev_unlock_ops(dev);
bpf_dev_bound_netdev_unregister(dev);
since 1d22d3060b9b ("net: drop rtnl_lock for queue_mgmt operations")
we drop the lock after setting UNREGISTERING so we may call .uninstall
after iouring torn down its side.
Right, Stan?
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq()
2025-04-02 23:24 ` Jakub Kicinski
@ 2025-04-03 1:27 ` Jakub Kicinski
0 siblings, 0 replies; 13+ messages in thread
From: Jakub Kicinski @ 2025-04-03 1:27 UTC (permalink / raw)
To: Mina Almasry, sdf
Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms, ap420073,
asml.silence, dw
On Wed, 2 Apr 2025 16:24:28 -0700 Jakub Kicinski wrote:
> On Wed, 2 Apr 2025 11:52:50 -0700 Mina Almasry wrote:
> > > netdev_lock(dev);
> > > - __net_mp_close_rxq(dev, ifq_idx, old_p);
> > > + /* Callers holding a netdev ref may get here after we already
> > > + * went thru shutdown via dev_memory_provider_uninstall().
> > > + */
> > > + if (dev->reg_state <= NETREG_REGISTERED)
> > > + __net_mp_close_rxq(dev, ifq_idx, old_p);
> >
> > Not obvious to me why this check was moved. Do you expect to call
> > __net_mp_close_rxq on an unregistered netdev and expect it to succeed
> > in io_uring binding or something?
>
> Yes, iouring state is under spin lock it can't call in here atomically.
> device unregister may race with iouring shutdown.
>
> Now that I look at it I think we need
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index be17e0660144..0a70080a1209 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -11947,6 +11947,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
> unlist_netdevice(dev);
> netdev_lock(dev);
> WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
> + dev_memory_provider_uninstall(dev);
> netdev_unlock(dev);
> }
> flush_all_backlogs();
> @@ -11961,7 +11962,6 @@ void unregister_netdevice_many_notify(struct list_head *head,
> dev_tcx_uninstall(dev);
> netdev_lock_ops(dev);
> dev_xdp_uninstall(dev);
> - dev_memory_provider_uninstall(dev);
> netdev_unlock_ops(dev);
> bpf_dev_bound_netdev_unregister(dev);
>
> since 1d22d3060b9b ("net: drop rtnl_lock for queue_mgmt operations")
> we drop the lock after setting UNREGISTERING so we may call .uninstall
> after iouring torn down its side.
>
> Right, Stan?
Actually, if I don't split the check here things will just work.
Let me do that in v2. Thanks for flagging!
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2025-04-03 1:27 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-31 19:42 [PATCH net 0/2] net: make memory provider install / close paths more common Jakub Kicinski
2025-03-31 19:43 ` [PATCH net 1/2] net: move mp dev config validation to __net_mp_open_rxq() Jakub Kicinski
2025-03-31 20:39 ` Stanislav Fomichev
2025-04-01 11:37 ` Pavel Begunkov
2025-04-01 15:00 ` Jakub Kicinski
2025-04-02 12:22 ` Pavel Begunkov
2025-04-02 18:46 ` Mina Almasry
2025-03-31 19:43 ` [PATCH net 2/2] net: avoid false positive warnings in __net_mp_close_rxq() Jakub Kicinski
2025-03-31 20:41 ` Stanislav Fomichev
2025-04-02 18:52 ` Mina Almasry
2025-04-02 23:24 ` Jakub Kicinski
2025-04-03 1:27 ` Jakub Kicinski
2025-04-02 15:35 ` [PATCH net 0/2] net: make memory provider install / close paths more common Taehee Yoo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).