All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Gobert <richardbgobert@gmail.com>
To: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, corbet@lwn.net, yoshfuji@linux-ipv6.org,
	dsahern@kernel.org, alex.aring@gmail.com,
	stefan@datenfreihafen.org, pablo@netfilter.org,
	kadlec@netfilter.org, fw@strlen.de, kafai@fb.com,
	netdev@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-wpan@vger.kernel.org,
	netfilter-devel@vger.kernel.org, coreteam@netfilter.org
Subject: [PATCH 4/4] net-next: frags: dynamic timeout under load
Date: Mon, 29 Aug 2022 13:47:49 +0200	[thread overview]
Message-ID: <20220829114739.GA2436@debian> (raw)

Calculate a dynamic fragment reassembly timeout, taking into
consideration the current fqdir load and the load introduced by
the peer. Reintroduce low_thresh, which now acts as a knob for
adjusting per-peer memory limits.

Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
---
 Documentation/networking/ip-sysctl.rst |  3 +++
 include/net/inet_frag.h                |  1 +
 net/ipv4/inet_fragment.c               | 30 +++++++++++++++++++++++++-
 net/ipv4/ip_fragment.c                 |  2 +-
 4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 56cd4ea059b2..fb25aa6e22a2 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -247,6 +247,9 @@ ipfrag_low_thresh - LONG INTEGER
 	begins to remove incomplete fragment queues to free up resources.
 	The kernel still accepts new fragments for defragmentation.
 
+	(Since linux-6.1)
+	Maximum memory used to reassemble IP fragments sent by a single peer.
+
 ipfrag_time - INTEGER
 	Time in seconds to keep an IP fragment in memory.
 
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 077a0ec78a58..595a6db57a0e 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -99,6 +99,7 @@ struct inet_frag_queue {
 	u16			max_size;
 	struct fqdir		*fqdir;
 	struct inet_peer	*peer;
+	u64			timeout;
 	struct rcu_head		rcu;
 };
 
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 8b8d77d548d4..34c5ebba4951 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -314,6 +314,30 @@ void inet_frag_free(struct inet_frag_queue *q)
 	call_rcu(&q->rcu, inet_frag_destroy_rcu);
 }
 
+static int inet_frag_update_timeout(struct inet_frag_queue *q)
+{
+	u64 peer_timeout, inet_timeout;
+	long peer_mem, inet_mem;
+	long high_thresh = READ_ONCE(q->fqdir->high_thresh);
+	long low_thresh  = READ_ONCE(q->fqdir->low_thresh);
+	u64 base_timeout = READ_ONCE(q->fqdir->timeout);
+
+	peer_mem = low_thresh - peer_mem_limit(q);
+	inet_mem = high_thresh - frag_mem_limit(q->fqdir);
+
+	if (peer_mem <= 0 || inet_mem <= 0)
+		return -ENOMEM;
+
+	/* Timeout changes linearly with respect to the amount of free memory.
+	 * Choose the more permissive of the two timeouts, to avoid limiting
+	 * the system while there is still enough memory.
+	 */
+	peer_timeout = div64_long(base_timeout * peer_mem, low_thresh);
+	inet_timeout = div64_long(base_timeout * inet_mem, high_thresh);
+	q->timeout = max_t(u64, peer_timeout, inet_timeout);
+	return 0;
+}
+
 void inet_frag_destroy(struct inet_frag_queue *q)
 {
 	struct fqdir *fqdir;
@@ -346,6 +370,10 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
 
 	q->fqdir = fqdir;
 	f->constructor(q, arg);
+	if (inet_frag_update_timeout(q)) {
+		inet_frag_free(q);
+		return NULL;
+	}
 	add_frag_mem_limit(q, f->qsize);
 
 	timer_setup(&q->timer, f->frag_expire, 0);
@@ -367,7 +395,7 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
 		*prev = ERR_PTR(-ENOMEM);
 		return NULL;
 	}
-	mod_timer(&q->timer, jiffies + fqdir->timeout);
+	mod_timer(&q->timer, jiffies + q->timeout);
 
 	*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
 						 &q->node, f->rhash_params);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e35061f6aadb..88a99242d721 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -236,7 +236,7 @@ static int ip_frag_reinit(struct ipq *qp)
 {
 	unsigned int sum_truesize = 0;
 
-	if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
+	if (!mod_timer(&qp->q.timer, jiffies + qp->q.timeout)) {
 		refcount_inc(&qp->q.refcnt);
 		return -ETIMEDOUT;
 	}
-- 
2.36.1


             reply	other threads:[~2022-08-29 12:11 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-29 11:47 Richard Gobert [this message]
2022-08-29 17:15 ` [PATCH 4/4] net-next: frags: dynamic timeout under load Eric Dumazet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220829114739.GA2436@debian \
    --to=richardbgobert@gmail.com \
    --cc=alex.aring@gmail.com \
    --cc=corbet@lwn.net \
    --cc=coreteam@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=kadlec@netfilter.org \
    --cc=kafai@fb.com \
    --cc=kuba@kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-wpan@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=pablo@netfilter.org \
    --cc=stefan@datenfreihafen.org \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.