[PATCH net-next v3 5/7] net: sched: pie: add more cases to auto-tune alpha and beta

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Leslie Monis <lesliemonis@gmail.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org,
	"Mohit P. Tahiliani" <tahiliani@nitk.edu.in>,
	Dave Taht <dave.taht@gmail.com>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Dhaval Khandla <dhavaljkhandla26@gmail.com>,
	Hrishikesh Hiraskar <hrishihiraskar@gmail.com>,
	Manish Kumar B <bmanish15597@gmail.com>,
	"Sachin D . Patil" <sdp.sachin@gmail.com>,
	Leslie Monis <lesliemonis@gmail.com>
Subject: [PATCH net-next v3 5/7] net: sched: pie: add more cases to auto-tune alpha and beta
Date: Tue, 26 Feb 2019 00:39:59 +0530	[thread overview]
Message-ID: <20190225191001.26797-6-lesliemonis@gmail.com> (raw)
In-Reply-To: <20190225191001.26797-1-lesliemonis@gmail.com>

From: "Mohit P. Tahiliani" <tahiliani@nitk.edu.in>

The current implementation scales the local alpha and beta
variables in the calculate_probability function by the same
amount for all values of drop probability below 1%.

RFC 8033 suggests using additional cases for auto-tuning
alpha and beta when the drop probability is less than 1%.

In order to add more auto-tuning cases, MAX_PROB must be
scaled by u64 instead of u32 to prevent underflow when
scaling the local alpha and beta variables in the
calculate_probability function.

Signed-off-by: Mohit P. Tahiliani <tahiliani@nitk.edu.in>
Signed-off-by: Dhaval Khandla <dhavaljkhandla26@gmail.com>
Signed-off-by: Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
Signed-off-by: Manish Kumar B <bmanish15597@gmail.com>
Signed-off-by: Sachin D. Patil <sdp.sachin@gmail.com>
Signed-off-by: Leslie Monis <lesliemonis@gmail.com>
Acked-by: Dave Taht <dave.taht@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/uapi/linux/pkt_sched.h |  2 +-
 net/sched/sch_pie.c            | 65 +++++++++++++++++-----------------
 2 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 0d18b1d1fbbc..1eb572ef3f27 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -954,7 +954,7 @@ enum {
 #define TCA_PIE_MAX   (__TCA_PIE_MAX - 1)
 
 struct tc_pie_xstats {
-	__u32 prob;             /* current probability */
+	__u64 prob;             /* current probability */
 	__u32 delay;            /* current delay in ms */
 	__u32 avg_dq_rate;      /* current average dq_rate in bits/pie_time */
 	__u32 packets_in;       /* total number of packets enqueued */
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index d88ab53593b3..30f158582499 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -33,7 +33,7 @@
 
 #define QUEUE_THRESHOLD 16384
 #define DQCOUNT_INVALID -1
-#define MAX_PROB  0xffffffff
+#define MAX_PROB 0xffffffffffffffff
 #define PIE_SCALE 8
 
 /* parameters used */
@@ -49,7 +49,7 @@ struct pie_params {
 
 /* variables used */
 struct pie_vars {
-	u32 prob;		/* probability but scaled by u32 limit. */
+	u64 prob;		/* probability but scaled by u64 limit. */
 	psched_time_t burst_time;
 	psched_time_t qdelay;
 	psched_time_t qdelay_old;
@@ -99,8 +99,8 @@ static void pie_vars_init(struct pie_vars *vars)
 static bool drop_early(struct Qdisc *sch, u32 packet_size)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
-	u32 rnd;
-	u32 local_prob = q->vars.prob;
+	u64 rnd;
+	u64 local_prob = q->vars.prob;
 	u32 mtu = psched_mtu(qdisc_dev(sch));
 
 	/* If there is still burst allowance left skip random early drop */
@@ -124,11 +124,11 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
 	 * probablity. Smaller packets will have lower drop prob in this case
 	 */
 	if (q->params.bytemode && packet_size <= mtu)
-		local_prob = (local_prob / mtu) * packet_size;
+		local_prob = (u64)packet_size * div_u64(local_prob, mtu);
 	else
 		local_prob = q->vars.prob;
 
-	rnd = prandom_u32();
+	prandom_bytes(&rnd, 8);
 	if (rnd < local_prob)
 		return true;
 
@@ -317,9 +317,10 @@ static void calculate_probability(struct Qdisc *sch)
 	u32 qlen = sch->qstats.backlog;	/* queue size in bytes */
 	psched_time_t qdelay = 0;	/* in pschedtime */
 	psched_time_t qdelay_old = q->vars.qdelay;	/* in pschedtime */
-	s32 delta = 0;		/* determines the change in probability */
-	u32 oldprob;
-	u32 alpha, beta;
+	s64 delta = 0;		/* determines the change in probability */
+	u64 oldprob;
+	u64 alpha, beta;
+	u32 power;
 	bool update_prob = true;
 
 	q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +340,36 @@ static void calculate_probability(struct Qdisc *sch)
 	 * value for alpha as 0.125. In this implementation, we use values 0-32
 	 * passed from user space to represent this. Also, alpha and beta have
 	 * unit of HZ and need to be scaled before they can used to update
-	 * probability. alpha/beta are updated locally below by 1) scaling them
-	 * appropriately 2) scaling down by 16 to come to 0-2 range.
-	 * Please see paper for details.
-	 *
-	 * We scale alpha and beta differently depending on whether we are in
-	 * light, medium or high dropping mode.
+	 * probability. alpha/beta are updated locally below by scaling down
+	 * by 16 to come to 0-2 range.
 	 */
-	if (q->vars.prob < MAX_PROB / 100) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-	} else if (q->vars.prob < MAX_PROB / 10) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-	} else {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+	/* We scale alpha and beta differently depending on how heavy the
+	 * congestion is. Please see RFC 8033 for details.
+	 */
+	if (q->vars.prob < MAX_PROB / 10) {
+		alpha >>= 1;
+		beta >>= 1;
+
+		power = 100;
+		while (q->vars.prob < div_u64(MAX_PROB, power) &&
+		       power <= 1000000) {
+			alpha >>= 2;
+			beta >>= 2;
+			power *= 10;
+		}
 	}
 
 	/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
-	delta += alpha * ((qdelay - q->params.target));
-	delta += beta * ((qdelay - qdelay_old));
+	delta += alpha * (u64)(qdelay - q->params.target);
+	delta += beta * (u64)(qdelay - qdelay_old);
 
 	oldprob = q->vars.prob;
 
 	/* to ensure we increase probability in steps of no more than 2% */
-	if (delta > (s32)(MAX_PROB / (100 / 2)) &&
+	if (delta > (s64)(MAX_PROB / (100 / 2)) &&
 	    q->vars.prob >= MAX_PROB / 10)
 		delta = (MAX_PROB / 100) * 2;
 
-- 
2.17.1

next prev parent reply	other threads:[~2019-02-25 19:10 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-25 19:09 [PATCH net-next v3 0/7] net: sched: pie: align PIE implementation with RFC 8033 Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 1/7] net: sched: pie: change value of QUEUE_THRESHOLD Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 2/7] net: sched: pie: change default value of pie_params->target Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 3/7] net: sched: pie: change default value of pie_params->tupdate Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 4/7] net: sched: pie: change initial value of pie_vars->burst_time Leslie Monis
2019-02-25 19:09 ` Leslie Monis [this message]
2019-02-25 19:10 ` [PATCH net-next v3 6/7] net: sched: pie: add derandomization mechanism Leslie Monis
2019-02-25 19:10 ` [PATCH net-next v3 7/7] net: sched: pie: update references Leslie Monis
2019-02-25 22:21 ` [PATCH net-next v3 0/7] net: sched: pie: align PIE implementation with RFC 8033 David Miller
2019-02-26  0:38 ` Stephen Hemminger
2019-02-26  1:02   ` Dave Taht
2019-02-26  8:20   ` Leslie Monis
2019-02-26 15:50     ` Stephen Hemminger

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:0d18b1d1fbb dfblob:1eb572ef3f2 dfblob:d88ab53593b
dfblob:30f15858249 )
 OR (
bs:"[PATCH net-next v3 5/7] net: sched: pie: add more cases to auto-tune alpha and beta" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190225191001.26797-6-lesliemonis@gmail.com \
    --to=lesliemonis@gmail.com \
    --cc=bmanish15597@gmail.com \
    --cc=dave.taht@gmail.com \
    --cc=davem@davemloft.net \
    --cc=dhavaljkhandla26@gmail.com \
    --cc=hrishihiraskar@gmail.com \
    --cc=jhs@mojatatu.com \
    --cc=netdev@vger.kernel.org \
    --cc=sdp.sachin@gmail.com \
    --cc=tahiliani@nitk.edu.in \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).