[PATCH net-next v3 5/7] net: sched: pie: add more cases to auto-tune alpha and beta

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Leslie Monis <lesliemonis@gmail.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org,
	"Mohit P. Tahiliani" <tahiliani@nitk.edu.in>,
	Dave Taht <dave.taht@gmail.com>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Dhaval Khandla <dhavaljkhandla26@gmail.com>,
	Hrishikesh Hiraskar <hrishihiraskar@gmail.com>,
	Manish Kumar B <bmanish15597@gmail.com>,
	"Sachin D . Patil" <sdp.sachin@gmail.com>,
	Leslie Monis <lesliemonis@gmail.com>
Subject: [PATCH net-next v3 5/7] net: sched: pie: add more cases to auto-tune alpha and beta
Date: Tue, 26 Feb 2019 00:39:59 +0530	[thread overview]
Message-ID: <20190225191001.26797-6-lesliemonis@gmail.com> (raw)
In-Reply-To: <20190225191001.26797-1-lesliemonis@gmail.com>

From: "Mohit P. Tahiliani" <tahiliani@nitk.edu.in>

The current implementation scales the local alpha and beta
variables in the calculate_probability function by the same
amount for all values of drop probability below 1%.

RFC 8033 suggests using additional cases for auto-tuning
alpha and beta when the drop probability is less than 1%.

In order to add more auto-tuning cases, MAX_PROB must be
scaled by u64 instead of u32 to prevent underflow when
scaling the local alpha and beta variables in the
calculate_probability function.

Signed-off-by: Mohit P. Tahiliani <tahiliani@nitk.edu.in>
Signed-off-by: Dhaval Khandla <dhavaljkhandla26@gmail.com>
Signed-off-by: Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
Signed-off-by: Manish Kumar B <bmanish15597@gmail.com>
Signed-off-by: Sachin D. Patil <sdp.sachin@gmail.com>
Signed-off-by: Leslie Monis <lesliemonis@gmail.com>
Acked-by: Dave Taht <dave.taht@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/uapi/linux/pkt_sched.h |  2 +-
 net/sched/sch_pie.c            | 65 +++++++++++++++++-----------------
 2 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 0d18b1d1fbbc..1eb572ef3f27 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -954,7 +954,7 @@ enum {
 #define TCA_PIE_MAX   (__TCA_PIE_MAX - 1)
 
 struct tc_pie_xstats {
-	__u32 prob;             /* current probability */
+	__u64 prob;             /* current probability */
 	__u32 delay;            /* current delay in ms */
 	__u32 avg_dq_rate;      /* current average dq_rate in bits/pie_time */
 	__u32 packets_in;       /* total number of packets enqueued */
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index d88ab53593b3..30f158582499 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -33,7 +33,7 @@
 
 #define QUEUE_THRESHOLD 16384
 #define DQCOUNT_INVALID -1
-#define MAX_PROB  0xffffffff
+#define MAX_PROB 0xffffffffffffffff
 #define PIE_SCALE 8
 
 /* parameters used */
@@ -49,7 +49,7 @@ struct pie_params {
 
 /* variables used */
 struct pie_vars {
-	u32 prob;		/* probability but scaled by u32 limit. */
+	u64 prob;		/* probability but scaled by u64 limit. */
 	psched_time_t burst_time;
 	psched_time_t qdelay;
 	psched_time_t qdelay_old;
@@ -99,8 +99,8 @@ static void pie_vars_init(struct pie_vars *vars)
 static bool drop_early(struct Qdisc *sch, u32 packet_size)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
-	u32 rnd;
-	u32 local_prob = q->vars.prob;
+	u64 rnd;
+	u64 local_prob = q->vars.prob;
 	u32 mtu = psched_mtu(qdisc_dev(sch));
 
 	/* If there is still burst allowance left skip random early drop */
@@ -124,11 +124,11 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
 	 * probablity. Smaller packets will have lower drop prob in this case
 	 */
 	if (q->params.bytemode && packet_size <= mtu)
-		local_prob = (local_prob / mtu) * packet_size;
+		local_prob = (u64)packet_size * div_u64(local_prob, mtu);
 	else
 		local_prob = q->vars.prob;
 
-	rnd = prandom_u32();
+	prandom_bytes(&rnd, 8);
 	if (rnd < local_prob)
 		return true;
 
@@ -317,9 +317,10 @@ static void calculate_probability(struct Qdisc *sch)
 	u32 qlen = sch->qstats.backlog;	/* queue size in bytes */
 	psched_time_t qdelay = 0;	/* in pschedtime */
 	psched_time_t qdelay_old = q->vars.qdelay;	/* in pschedtime */
-	s32 delta = 0;		/* determines the change in probability */
-	u32 oldprob;
-	u32 alpha, beta;
+	s64 delta = 0;		/* determines the change in probability */
+	u64 oldprob;
+	u64 alpha, beta;
+	u32 power;
 	bool update_prob = true;
 
 	q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +340,36 @@ static void calculate_probability(struct Qdisc *sch)
 	 * value for alpha as 0.125. In this implementation, we use values 0-32
 	 * passed from user space to represent this. Also, alpha and beta have
 	 * unit of HZ and need to be scaled before they can used to update
-	 * probability. alpha/beta are updated locally below by 1) scaling them
-	 * appropriately 2) scaling down by 16 to come to 0-2 range.
-	 * Please see paper for details.
-	 *
-	 * We scale alpha and beta differently depending on whether we are in
-	 * light, medium or high dropping mode.
+	 * probability. alpha/beta are updated locally below by scaling down
+	 * by 16 to come to 0-2 range.
 	 */
-	if (q->vars.prob < MAX_PROB / 100) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-	} else if (q->vars.prob < MAX_PROB / 10) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-	} else {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+	/* We scale alpha and beta differently depending on how heavy the
+	 * congestion is. Please see RFC 8033 for details.
+	 */
+	if (q->vars.prob < MAX_PROB / 10) {
+		alpha >>= 1;
+		beta >>= 1;
+
+		power = 100;
+		while (q->vars.prob < div_u64(MAX_PROB, power) &&
+		       power <= 1000000) {
+			alpha >>= 2;
+			beta >>= 2;
+			power *= 10;
+		}
 	}
 
 	/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
-	delta += alpha * ((qdelay - q->params.target));
-	delta += beta * ((qdelay - qdelay_old));
+	delta += alpha * (u64)(qdelay - q->params.target);
+	delta += beta * (u64)(qdelay - qdelay_old);
 
 	oldprob = q->vars.prob;
 
 	/* to ensure we increase probability in steps of no more than 2% */
-	if (delta > (s32)(MAX_PROB / (100 / 2)) &&
+	if (delta > (s64)(MAX_PROB / (100 / 2)) &&
 	    q->vars.prob >= MAX_PROB / 10)
 		delta = (MAX_PROB / 100) * 2;
 
-- 
2.17.1

next prev parent reply	other threads:[~2019-02-25 19:10 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-25 19:09 [PATCH net-next v3 0/7] net: sched: pie: align PIE implementation with RFC 8033 Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 1/7] net: sched: pie: change value of QUEUE_THRESHOLD Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 2/7] net: sched: pie: change default value of pie_params->target Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 3/7] net: sched: pie: change default value of pie_params->tupdate Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 4/7] net: sched: pie: change initial value of pie_vars->burst_time Leslie Monis
2019-02-25 19:09 ` Leslie Monis [this message]
2019-02-25 19:10 ` [PATCH net-next v3 6/7] net: sched: pie: add derandomization mechanism Leslie Monis
2019-02-25 19:10 ` [PATCH net-next v3 7/7] net: sched: pie: update references Leslie Monis
2019-02-25 22:21 ` [PATCH net-next v3 0/7] net: sched: pie: align PIE implementation with RFC 8033 David Miller
2019-02-26  0:38 ` Stephen Hemminger
2019-02-26  1:02   ` Dave Taht
2019-02-26  8:20   ` Leslie Monis
2019-02-26 15:50     ` Stephen Hemminger

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:0d18b1d1fbb dfblob:1eb572ef3f2 dfblob:d88ab53593b
dfblob:30f15858249 )
 OR (
bs:"[PATCH net-next v3 5/7] net: sched: pie: add more cases to auto-tune alpha and beta" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190225191001.26797-6-lesliemonis@gmail.com \
    --to=lesliemonis@gmail.com \
    --cc=bmanish15597@gmail.com \
    --cc=dave.taht@gmail.com \
    --cc=davem@davemloft.net \
    --cc=dhavaljkhandla26@gmail.com \
    --cc=hrishihiraskar@gmail.com \
    --cc=jhs@mojatatu.com \
    --cc=netdev@vger.kernel.org \
    --cc=sdp.sachin@gmail.com \
    --cc=tahiliani@nitk.edu.in \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.