public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel.org>
To: Bobby Eshleman <bobbyeshleman@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Paolo Abeni <pabeni@redhat.com>, Simon Horman <horms@kernel.org>,
	Kuniyuki Iwashima <kuniyu@google.com>,
	Willem de Bruijn <willemb@google.com>,
	Neal Cardwell <ncardwell@google.com>,
	David Ahern <dsahern@kernel.org>,
	Mina Almasry <almasrymina@google.com>,
	Arnd Bergmann <arnd@arndb.de>, Jonathan Corbet <corbet@lwn.net>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	Shuah Khan <shuah@kernel.org>,
	Donald Hunter <donald.hunter@gmail.com>,
	Stanislav Fomichev <sdf@fomichev.me>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-arch@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-kselftest@vger.kernel.org, asml.silence@gmail.com,
	matttbe@kernel.org, skhawaja@google.com,
	Bobby Eshleman <bobbyeshleman@meta.com>
Subject: Re: [PATCH net-next v10 3/5] net: devmem: implement autorelease token management
Date: Tue, 20 Jan 2026 17:00:42 -0800	[thread overview]
Message-ID: <20260120170042.43f038a2@kernel.org> (raw)
In-Reply-To: <20260115-scratch-bobbyeshleman-devmem-tcp-token-upstream-v10-3-686d0af71978@meta.com>

On Thu, 15 Jan 2026 21:02:14 -0800 Bobby Eshleman wrote:
> diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
> index 596c306ce52b..a5301b150663 100644
> --- a/Documentation/netlink/specs/netdev.yaml
> +++ b/Documentation/netlink/specs/netdev.yaml
> @@ -562,6 +562,17 @@ attribute-sets:
>          type: u32
>          checks:
>            min: 1
> +      -
> +        name: autorelease
> +        doc: |
> +          Token autorelease mode. If true (1), leaked tokens are automatically
> +          released when the socket closes. If false (0), leaked tokens are only
> +          released when the dmabuf is torn down. Once a binding is created with
> +          a specific mode, all subsequent bindings system-wide must use the
> +          same mode.
> +
> +          Optional. Defaults to false if not specified.
> +        type: u8

if you plan to have more values - u32, if not - flag
u8 is 8b value + 24b of padding, it's only useful for proto fields

>  operations:
>    list:
> @@ -769,6 +780,7 @@ operations:
>              - ifindex
>              - fd
>              - queues
> +            - autorelease
>          reply:
>            attributes:
>              - id

>  static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
> +static DEFINE_MUTEX(devmem_ar_lock);
> +DEFINE_STATIC_KEY_FALSE(tcp_devmem_ar_key);
> +EXPORT_SYMBOL(tcp_devmem_ar_key);

I don't think you need the export, perhaps move the helper in here in
the first place (while keeping the static inline wrapper when devmem=n)?

> +	if (autorelease)
> +		static_branch_enable(&tcp_devmem_ar_key);

This is user-controlled (non-root), right? So I think we need 
the deferred version of key helpers. 

> -	if (direction == DMA_TO_DEVICE) {
> -		binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
> -					      sizeof(struct net_iov *),
> -					      GFP_KERNEL);
> -		if (!binding->vec) {
> -			err = -ENOMEM;
> -			goto err_unmap;
> -		}
> +	binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
> +				      sizeof(struct net_iov *),
> +				      GFP_KERNEL | __GFP_ZERO);

make it a kvcalloc() while we're touching it, pls

> +	if (!binding->vec) {
> +		err = -ENOMEM;
> +		goto err_unmap;
>  	}
>  
>  	/* For simplicity we expect to make PAGE_SIZE allocations, but the
> @@ -306,25 +386,41 @@ net_devmem_bind_dmabuf(struct net_device *dev,
>  			niov = &owner->area.niovs[i];
>  			niov->type = NET_IOV_DMABUF;
>  			niov->owner = &owner->area;
> +			atomic_set(&niov->uref, 0);

Isn't it zero'ed during alloc?

>  			page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
>  						      net_devmem_get_dma_addr(niov));
> -			if (direction == DMA_TO_DEVICE)
> -				binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
> +			binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
>  		}
>  
>  		virtual += len;
>  	}
>  

> +	if (info->attrs[NETDEV_A_DMABUF_AUTORELEASE])
> +		autorelease =
> +			!!nla_get_u8(info->attrs[NETDEV_A_DMABUF_AUTORELEASE]);

nla_get_u8_default() 

>  	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
>  	if (IS_ERR(priv))
>  		return PTR_ERR(priv);

> +static noinline_for_stack int
> +sock_devmem_dontneed_manual_release(struct sock *sk,
> +				    struct dmabuf_token *tokens,
> +				    unsigned int num_tokens)
> +{
> +	struct net_iov *niov;
> +	unsigned int i, j;
> +	netmem_ref netmem;
> +	unsigned int token;
> +	int num_frags = 0;
> +	int ret = 0;
> +
> +	if (!sk->sk_devmem_info.binding)
> +		return -EINVAL;
> +
> +	for (i = 0; i < num_tokens; i++) {
> +		for (j = 0; j < tokens[i].token_count; j++) {
> +			size_t size = sk->sk_devmem_info.binding->dmabuf->size;
> +
> +			token = tokens[i].token_start + j;
> +			if (token >= size / PAGE_SIZE)
> +				break;
> +
> +			if (++num_frags > MAX_DONTNEED_FRAGS)
> +				return ret;
> +
> +			niov = sk->sk_devmem_info.binding->vec[token];
> +			if (atomic_dec_and_test(&niov->uref)) {

Don't you need something like "atomic dec non zero and test" ?
refcount has refcount_dec_not_one() 🤔️

> +				netmem = net_iov_to_netmem(niov);
> +				WARN_ON_ONCE(!napi_pp_put_page(netmem));
> +			}
> +			ret++;
> +		}

>  frag_limit_reached:
> -	xa_unlock_bh(&sk->sk_user_frags);
> +	xa_unlock_bh(&sk->sk_devmem_info.frags);

may be worth separating the sk_devmem_info change out for clarity

>  	for (k = 0; k < netmem_num; k++)
>  		WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));

> @@ -2503,7 +2506,15 @@ void tcp_v4_destroy_sock(struct sock *sk)
>  
>  	tcp_release_user_frags(sk);
>  
> -	xa_destroy(&sk->sk_user_frags);
> +	if (!net_devmem_autorelease_enabled() && sk->sk_devmem_info.binding) {
> +		net_devmem_dmabuf_binding_user_put(sk->sk_devmem_info.binding);
> +		net_devmem_dmabuf_binding_put(sk->sk_devmem_info.binding);
> +		sk->sk_devmem_info.binding = NULL;
> +		WARN_ONCE(!xa_empty(&sk->sk_devmem_info.frags),
> +			  "non-empty xarray discovered in autorelease off mode");
> +	}
> +
> +	xa_destroy(&sk->sk_devmem_info.frags);

Let's wrap this up in a helper that'll live in devmem.c

  reply	other threads:[~2026-01-21  1:00 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-16  5:02 [PATCH net-next v10 0/5] net: devmem: improve cpu cost of RX token management Bobby Eshleman
2026-01-16  5:02 ` [PATCH net-next v10 1/5] net: devmem: rename tx_vec to vec in dmabuf binding Bobby Eshleman
2026-01-16  5:02 ` [PATCH net-next v10 2/5] net: devmem: refactor sock_devmem_dontneed for autorelease split Bobby Eshleman
2026-01-16  5:02 ` [PATCH net-next v10 3/5] net: devmem: implement autorelease token management Bobby Eshleman
2026-01-21  1:00   ` Jakub Kicinski [this message]
2026-01-21  5:33     ` Bobby Eshleman
2026-01-22  4:15   ` Mina Almasry
2026-01-22  5:18     ` Bobby Eshleman
2026-01-16  5:02 ` [PATCH net-next v10 4/5] net: devmem: document NETDEV_A_DMABUF_AUTORELEASE netlink attribute Bobby Eshleman
2026-01-21  0:36   ` Jakub Kicinski
2026-01-21  5:44     ` Bobby Eshleman
2026-01-22  1:35       ` Jakub Kicinski
2026-01-22  2:37         ` Bobby Eshleman
2026-01-22  2:50           ` Jakub Kicinski
2026-01-22  3:25             ` Bobby Eshleman
2026-01-22  3:46               ` Jakub Kicinski
2026-01-22  4:07                 ` Stanislav Fomichev
2026-01-27  1:26                   ` Jakub Kicinski
2026-01-27  2:30                     ` Bobby Eshleman
2026-01-27  2:44                       ` Jakub Kicinski
2026-01-27  3:06                         ` Bobby Eshleman
2026-01-27  3:43                           ` Jakub Kicinski
2026-01-27  3:50                             ` Bobby Eshleman
2026-01-16  5:02 ` [PATCH net-next v10 5/5] selftests: drv-net: devmem: add autorelease tests Bobby Eshleman
2026-01-21  1:07 ` [PATCH net-next v10 0/5] net: devmem: improve cpu cost of RX token management Jakub Kicinski
2026-01-21  5:29   ` Bobby Eshleman
2026-01-22  1:37     ` Jakub Kicinski
2026-01-22  4:21   ` Mina Almasry
2026-01-26 18:45     ` Bobby Eshleman
2026-01-27  1:31       ` Jakub Kicinski
2026-01-27  6:00         ` Stanislav Fomichev
2026-01-27  6:48           ` Bobby Eshleman
2026-01-30 11:13             ` Pavel Begunkov
2026-02-05  3:48               ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260120170042.43f038a2@kernel.org \
    --to=kuba@kernel.org \
    --cc=almasrymina@google.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=arnd@arndb.de \
    --cc=asml.silence@gmail.com \
    --cc=bobbyeshleman@gmail.com \
    --cc=bobbyeshleman@meta.com \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=donald.hunter@gmail.com \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=kuniyu@google.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=matttbe@kernel.org \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    --cc=shuah@kernel.org \
    --cc=skhawaja@google.com \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox