From: Leslie Monis <lesliemonis@gmail.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org,
"Mohit P. Tahiliani" <tahiliani@nitk.edu.in>,
Dave Taht <dave.taht@gmail.com>,
Jamal Hadi Salim <jhs@mojatatu.com>,
Dhaval Khandla <dhavaljkhandla26@gmail.com>,
Hrishikesh Hiraskar <hrishihiraskar@gmail.com>,
Manish Kumar B <bmanish15597@gmail.com>,
"Sachin D . Patil" <sdp.sachin@gmail.com>,
Leslie Monis <lesliemonis@gmail.com>
Subject: [PATCH net-next v3 5/7] net: sched: pie: add more cases to auto-tune alpha and beta
Date: Tue, 26 Feb 2019 00:39:59 +0530 [thread overview]
Message-ID: <20190225191001.26797-6-lesliemonis@gmail.com> (raw)
In-Reply-To: <20190225191001.26797-1-lesliemonis@gmail.com>
From: "Mohit P. Tahiliani" <tahiliani@nitk.edu.in>
The current implementation scales the local alpha and beta
variables in the calculate_probability function by the same
amount for all values of drop probability below 1%.
RFC 8033 suggests using additional cases for auto-tuning
alpha and beta when the drop probability is less than 1%.
In order to add more auto-tuning cases, MAX_PROB must be
scaled by u64 instead of u32 to prevent underflow when
scaling the local alpha and beta variables in the
calculate_probability function.
Signed-off-by: Mohit P. Tahiliani <tahiliani@nitk.edu.in>
Signed-off-by: Dhaval Khandla <dhavaljkhandla26@gmail.com>
Signed-off-by: Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
Signed-off-by: Manish Kumar B <bmanish15597@gmail.com>
Signed-off-by: Sachin D. Patil <sdp.sachin@gmail.com>
Signed-off-by: Leslie Monis <lesliemonis@gmail.com>
Acked-by: Dave Taht <dave.taht@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
include/uapi/linux/pkt_sched.h | 2 +-
net/sched/sch_pie.c | 65 +++++++++++++++++-----------------
2 files changed, 33 insertions(+), 34 deletions(-)
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 0d18b1d1fbbc..1eb572ef3f27 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -954,7 +954,7 @@ enum {
#define TCA_PIE_MAX (__TCA_PIE_MAX - 1)
struct tc_pie_xstats {
- __u32 prob; /* current probability */
+ __u64 prob; /* current probability */
__u32 delay; /* current delay in ms */
__u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */
__u32 packets_in; /* total number of packets enqueued */
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index d88ab53593b3..30f158582499 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -33,7 +33,7 @@
#define QUEUE_THRESHOLD 16384
#define DQCOUNT_INVALID -1
-#define MAX_PROB 0xffffffff
+#define MAX_PROB 0xffffffffffffffff
#define PIE_SCALE 8
/* parameters used */
@@ -49,7 +49,7 @@ struct pie_params {
/* variables used */
struct pie_vars {
- u32 prob; /* probability but scaled by u32 limit. */
+ u64 prob; /* probability but scaled by u64 limit. */
psched_time_t burst_time;
psched_time_t qdelay;
psched_time_t qdelay_old;
@@ -99,8 +99,8 @@ static void pie_vars_init(struct pie_vars *vars)
static bool drop_early(struct Qdisc *sch, u32 packet_size)
{
struct pie_sched_data *q = qdisc_priv(sch);
- u32 rnd;
- u32 local_prob = q->vars.prob;
+ u64 rnd;
+ u64 local_prob = q->vars.prob;
u32 mtu = psched_mtu(qdisc_dev(sch));
/* If there is still burst allowance left skip random early drop */
@@ -124,11 +124,11 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
* probablity. Smaller packets will have lower drop prob in this case
*/
if (q->params.bytemode && packet_size <= mtu)
- local_prob = (local_prob / mtu) * packet_size;
+ local_prob = (u64)packet_size * div_u64(local_prob, mtu);
else
local_prob = q->vars.prob;
- rnd = prandom_u32();
+ prandom_bytes(&rnd, 8);
if (rnd < local_prob)
return true;
@@ -317,9 +317,10 @@ static void calculate_probability(struct Qdisc *sch)
u32 qlen = sch->qstats.backlog; /* queue size in bytes */
psched_time_t qdelay = 0; /* in pschedtime */
psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
- s32 delta = 0; /* determines the change in probability */
- u32 oldprob;
- u32 alpha, beta;
+ s64 delta = 0; /* determines the change in probability */
+ u64 oldprob;
+ u64 alpha, beta;
+ u32 power;
bool update_prob = true;
q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +340,36 @@ static void calculate_probability(struct Qdisc *sch)
* value for alpha as 0.125. In this implementation, we use values 0-32
* passed from user space to represent this. Also, alpha and beta have
* unit of HZ and need to be scaled before they can used to update
- * probability. alpha/beta are updated locally below by 1) scaling them
- * appropriately 2) scaling down by 16 to come to 0-2 range.
- * Please see paper for details.
- *
- * We scale alpha and beta differently depending on whether we are in
- * light, medium or high dropping mode.
+ * probability. alpha/beta are updated locally below by scaling down
+ * by 16 to come to 0-2 range.
*/
- if (q->vars.prob < MAX_PROB / 100) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- } else if (q->vars.prob < MAX_PROB / 10) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- } else {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+ /* We scale alpha and beta differently depending on how heavy the
+ * congestion is. Please see RFC 8033 for details.
+ */
+ if (q->vars.prob < MAX_PROB / 10) {
+ alpha >>= 1;
+ beta >>= 1;
+
+ power = 100;
+ while (q->vars.prob < div_u64(MAX_PROB, power) &&
+ power <= 1000000) {
+ alpha >>= 2;
+ beta >>= 2;
+ power *= 10;
+ }
}
/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
- delta += alpha * ((qdelay - q->params.target));
- delta += beta * ((qdelay - qdelay_old));
+ delta += alpha * (u64)(qdelay - q->params.target);
+ delta += beta * (u64)(qdelay - qdelay_old);
oldprob = q->vars.prob;
/* to ensure we increase probability in steps of no more than 2% */
- if (delta > (s32)(MAX_PROB / (100 / 2)) &&
+ if (delta > (s64)(MAX_PROB / (100 / 2)) &&
q->vars.prob >= MAX_PROB / 10)
delta = (MAX_PROB / 100) * 2;
--
2.17.1
next prev parent reply other threads:[~2019-02-25 19:10 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-25 19:09 [PATCH net-next v3 0/7] net: sched: pie: align PIE implementation with RFC 8033 Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 1/7] net: sched: pie: change value of QUEUE_THRESHOLD Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 2/7] net: sched: pie: change default value of pie_params->target Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 3/7] net: sched: pie: change default value of pie_params->tupdate Leslie Monis
2019-02-25 19:09 ` [PATCH net-next v3 4/7] net: sched: pie: change initial value of pie_vars->burst_time Leslie Monis
2019-02-25 19:09 ` Leslie Monis [this message]
2019-02-25 19:10 ` [PATCH net-next v3 6/7] net: sched: pie: add derandomization mechanism Leslie Monis
2019-02-25 19:10 ` [PATCH net-next v3 7/7] net: sched: pie: update references Leslie Monis
2019-02-25 22:21 ` [PATCH net-next v3 0/7] net: sched: pie: align PIE implementation with RFC 8033 David Miller
2019-02-26 0:38 ` Stephen Hemminger
2019-02-26 1:02 ` Dave Taht
2019-02-26 8:20 ` Leslie Monis
2019-02-26 15:50 ` Stephen Hemminger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190225191001.26797-6-lesliemonis@gmail.com \
--to=lesliemonis@gmail.com \
--cc=bmanish15597@gmail.com \
--cc=dave.taht@gmail.com \
--cc=davem@davemloft.net \
--cc=dhavaljkhandla26@gmail.com \
--cc=hrishihiraskar@gmail.com \
--cc=jhs@mojatatu.com \
--cc=netdev@vger.kernel.org \
--cc=sdp.sachin@gmail.com \
--cc=tahiliani@nitk.edu.in \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).