From: Jacob Keller <jacob.e.keller@intel.com>
To: Eric Dumazet <edumazet@google.com>,
"David S . Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: David Ahern <dsahern@kernel.org>, Simon Horman <horms@kernel.org>,
<netdev@vger.kernel.org>, <eric.dumazet@gmail.com>
Subject: Re: [PATCH v2 net-next 4/4] inet: frags: save a pair of atomic operations in reassembly
Date: Thu, 13 Mar 2025 15:53:23 -0700 [thread overview]
Message-ID: <a4210935-7dc3-42fd-b8bd-68ea8151058e@intel.com> (raw)
In-Reply-To: <20250312082250.1803501-5-edumazet@google.com>
On 3/12/2025 1:22 AM, Eric Dumazet wrote:
> As mentioned in commit 648700f76b03 ("inet: frags:
> use rhashtables for reassembly units"):
>
> A followup patch will even remove the refcount hold/release
> left from prior implementation and save a couple of atomic
> operations.
>
> This patch implements this idea, seven years later.
>
Easy for things to slip through the cracks and get forgotten.
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
> net/ieee802154/6lowpan/reassembly.c | 5 ++++-
> net/ipv4/inet_fragment.c | 18 ++++++++----------
> net/ipv4/ip_fragment.c | 5 ++++-
> net/ipv6/netfilter/nf_conntrack_reasm.c | 5 ++++-
> net/ipv6/reassembly.c | 9 ++++-----
> 5 files changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
> index c5f86cff06ee7f95556955f994b859c86a315646..d4b983d170382e616ea58821b197da89885a6bb2 100644
> --- a/net/ieee802154/6lowpan/reassembly.c
> +++ b/net/ieee802154/6lowpan/reassembly.c
> @@ -304,17 +304,20 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
> goto err;
> }
>
> + rcu_read_lock();
> fq = fq_find(net, cb, &hdr.source, &hdr.dest);
> if (fq != NULL) {
> - int ret, refs = 1;
> + int ret, refs = 0;
>
> spin_lock(&fq->q.lock);
> ret = lowpan_frag_queue(fq, skb, frag_type, &refs);
> spin_unlock(&fq->q.lock);
>
> + rcu_read_unlock();
> inet_frag_putn(&fq->q, refs);
> return ret;
> }
> + rcu_read_unlock();
>
> err:
> kfree_skb(skb);
> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
> index 5eb18605001387e7f23b8661dc9f24a533ab1600..19fae4811ab2803bed2faa4900869f883cb3073c 100644
> --- a/net/ipv4/inet_fragment.c
> +++ b/net/ipv4/inet_fragment.c
> @@ -327,7 +327,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
>
> timer_setup(&q->timer, f->frag_expire, 0);
> spin_lock_init(&q->lock);
> - refcount_set(&q->refcnt, 3);
> + /* One reference for the timer, one for the hash table. */
> + refcount_set(&q->refcnt, 2);
>
> return q;
> }
> @@ -349,7 +350,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
> *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
> &q->node, f->rhash_params);
> if (*prev) {
> - int refs = 2;
> + /* We could not insert in the hash table,
> + * we need to cancel what inet_frag_alloc()
> + * anticipated.
> + */
> + int refs = 1;
>
> q->flags |= INET_FRAG_COMPLETE;
> inet_frag_kill(q, &refs);
> @@ -359,7 +364,6 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
> return q;
> }
>
> -/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
> struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
> {
> /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
> @@ -369,17 +373,11 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
> if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
> return NULL;
>
> - rcu_read_lock();
> -
> prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
> if (!prev)
> fq = inet_frag_create(fqdir, key, &prev);
> - if (!IS_ERR_OR_NULL(prev)) {
> + if (!IS_ERR_OR_NULL(prev))
> fq = prev;
> - if (!refcount_inc_not_zero(&fq->refcnt))
> - fq = NULL;
> - }
> - rcu_read_unlock();
> return fq;
> }
> EXPORT_SYMBOL(inet_frag_find);
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index c5f3c810706fb328c3d8a4d8501424df0dceaa8e..77f395b28ec748bcd85b8dfa0a8c0b8a74684103 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -483,18 +483,21 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
> __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
>
> /* Lookup (or create) queue header */
> + rcu_read_lock();
> qp = ip_find(net, ip_hdr(skb), user, vif);
> if (qp) {
> - int ret, refs = 1;
> + int ret, refs = 0;
>
> spin_lock(&qp->q.lock);
>
> ret = ip_frag_queue(qp, skb, &refs);
>
> spin_unlock(&qp->q.lock);
> + rcu_read_unlock();
> inet_frag_putn(&qp->q, refs);
> return ret;
> }
> + rcu_read_unlock();
>
> __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
> kfree_skb(skb);
> diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
> index f33acb730dc5807205811c2675efd27a9ee99222..d6bd8f7079bb74ec99030201163332ed5c6d2eec 100644
> --- a/net/ipv6/netfilter/nf_conntrack_reasm.c
> +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
> @@ -450,7 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
> struct frag_hdr *fhdr;
> struct frag_queue *fq;
> struct ipv6hdr *hdr;
> - int refs = 1;
> + int refs = 0;
> u8 prevhdr;
>
> /* Jumbo payload inhibits frag. header */
> @@ -477,9 +477,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
> hdr = ipv6_hdr(skb);
> fhdr = (struct frag_hdr *)skb_transport_header(skb);
>
> + rcu_read_lock();
> fq = fq_find(net, fhdr->identification, user, hdr,
> skb->dev ? skb->dev->ifindex : 0);
> if (fq == NULL) {
> + rcu_read_unlock();
> pr_debug("Can't find and can't create new queue\n");
> return -ENOMEM;
> }
> @@ -493,6 +495,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
> }
>
> spin_unlock_bh(&fq->q.lock);
> + rcu_read_unlock();
> inet_frag_putn(&fq->q, refs);
> return ret;
> }
> diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
> index 7560380bd5871217d476f2e0e39332926c458bc1..49740898bc1370ff0ca89928750c6de85a45303f 100644
> --- a/net/ipv6/reassembly.c
> +++ b/net/ipv6/reassembly.c
> @@ -305,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
> skb_postpush_rcsum(skb, skb_network_header(skb),
> skb_network_header_len(skb));
>
> - rcu_read_lock();
> __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
> - rcu_read_unlock();
> fq->q.rb_fragments = RB_ROOT;
> fq->q.fragments_tail = NULL;
> fq->q.last_run_head = NULL;
> @@ -319,9 +317,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
> out_oom:
> net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
> out_fail:
> - rcu_read_lock();
> __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
> - rcu_read_unlock();
> inet_frag_kill(&fq->q, refs);
> return -1;
> }
> @@ -379,10 +375,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
> }
>
> iif = skb->dev ? skb->dev->ifindex : 0;
> + rcu_read_lock();
> fq = fq_find(net, fhdr->identification, hdr, iif);
> if (fq) {
> u32 prob_offset = 0;
> - int ret, refs = 1;
> + int ret, refs = 0;
>
> spin_lock(&fq->q.lock);
>
> @@ -391,6 +388,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
> &prob_offset, &refs);
>
> spin_unlock(&fq->q.lock);
> + rcu_read_unlock();
> inet_frag_putn(&fq->q, refs);
> if (prob_offset) {
> __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
> @@ -400,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
> }
> return ret;
> }
> + rcu_read_unlock();
>
> __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
> kfree_skb(skb);
next prev parent reply other threads:[~2025-03-13 22:54 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-12 8:22 [PATCH v2 net-next 0/4] inet: frags: fully use RCU Eric Dumazet
2025-03-12 8:22 ` [PATCH v2 net-next 1/4] inet: frags: add inet_frag_putn() helper Eric Dumazet
2025-03-12 8:22 ` [PATCH v2 net-next 2/4] ipv4: frags: remove ipq_put() Eric Dumazet
2025-03-12 8:22 ` [PATCH v2 net-next 3/4] inet: frags: change inet_frag_kill() to defer refcount updates Eric Dumazet
2025-03-12 8:22 ` [PATCH v2 net-next 4/4] inet: frags: save a pair of atomic operations in reassembly Eric Dumazet
2025-03-13 22:53 ` Jacob Keller [this message]
2025-03-18 12:30 ` [PATCH v2 net-next 0/4] inet: frags: fully use RCU patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a4210935-7dc3-42fd-b8bd-68ea8151058e@intel.com \
--to=jacob.e.keller@intel.com \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=eric.dumazet@gmail.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.