* [PATCH 2/2] codel: split into multiple files
From: Michal Kazior @ 2016-04-22 12:15 UTC (permalink / raw)
To: netdev; +Cc: davem, johannes, Michal Kazior
In-Reply-To: <1461327359-21646-1-git-send-email-michal.kazior@tieto.com>
It was impossible to include codel.h for the
purpose of having access to codel_params or
codel_vars structure definitions and using them
for embedding in other more complex structures.
This splits allows codel.h itself to be treated
like any other header file while codel_qdisc.h and
codel_impl.h contain function definitions with
logic that was previously in codel.h.
This copies over copyrights and doesn't involve
code changes other than adding a few additional
include directives to net/sched/sch*codel.c.
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
---
include/net/codel.h | 223 ----------------------------------------
include/net/codel_impl.h | 255 ++++++++++++++++++++++++++++++++++++++++++++++
include/net/codel_qdisc.h | 73 +++++++++++++
net/sched/sch_codel.c | 2 +
net/sched/sch_fq_codel.c | 2 +
5 files changed, 332 insertions(+), 223 deletions(-)
create mode 100644 include/net/codel_impl.h
create mode 100644 include/net/codel_qdisc.h
diff --git a/include/net/codel.h b/include/net/codel.h
index 06ac687b4909..a6e428f80135 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -87,27 +87,6 @@ static inline codel_time_t codel_get_time(void)
((s32)((a) - (b)) >= 0))
#define codel_time_before_eq(a, b) codel_time_after_eq(b, a)
-/* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */
-struct codel_skb_cb {
- codel_time_t enqueue_time;
-};
-
-static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb)
-{
- qdisc_cb_private_validate(skb, sizeof(struct codel_skb_cb));
- return (struct codel_skb_cb *)qdisc_skb_cb(skb)->data;
-}
-
-static codel_time_t codel_get_enqueue_time(const struct sk_buff *skb)
-{
- return get_codel_cb(skb)->enqueue_time;
-}
-
-static void codel_set_enqueue_time(struct sk_buff *skb)
-{
- get_codel_cb(skb)->enqueue_time = codel_get_time();
-}
-
static inline u32 codel_time_to_us(codel_time_t val)
{
u64 valns = ((u64)val << CODEL_SHIFT);
@@ -176,212 +155,10 @@ struct codel_stats {
#define CODEL_DISABLED_THRESHOLD INT_MAX
-static void codel_params_init(struct codel_params *params)
-{
- params->interval = MS2TIME(100);
- params->target = MS2TIME(5);
- params->ce_threshold = CODEL_DISABLED_THRESHOLD;
- params->ecn = false;
-}
-
-static void codel_vars_init(struct codel_vars *vars)
-{
- memset(vars, 0, sizeof(*vars));
-}
-
-static void codel_stats_init(struct codel_stats *stats)
-{
- stats->maxpacket = 0;
-}
-
-/*
- * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots
- * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
- *
- * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
- */
-static void codel_Newton_step(struct codel_vars *vars)
-{
- u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
- u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
- u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2);
-
- val >>= 2; /* avoid overflow in following multiply */
- val = (val * invsqrt) >> (32 - 2 + 1);
-
- vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
-}
-
-/*
- * CoDel control_law is t + interval/sqrt(count)
- * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
- * both sqrt() and divide operation.
- */
-static codel_time_t codel_control_law(codel_time_t t,
- codel_time_t interval,
- u32 rec_inv_sqrt)
-{
- return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT);
-}
-
typedef u32 (*codel_skb_len_t)(const struct sk_buff *skb);
typedef codel_time_t (*codel_skb_time_t)(const struct sk_buff *skb);
typedef void (*codel_skb_drop_t)(struct sk_buff *skb, void *ctx);
typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars,
void *ctx);
-static bool codel_should_drop(const struct sk_buff *skb,
- void *ctx,
- struct codel_vars *vars,
- struct codel_params *params,
- struct codel_stats *stats,
- codel_skb_len_t skb_len_func,
- codel_skb_time_t skb_time_func,
- u32 *backlog,
- codel_time_t now)
-{
- bool ok_to_drop;
- u32 skb_len;
-
- if (!skb) {
- vars->first_above_time = 0;
- return false;
- }
-
- skb_len = skb_len_func(skb);
- vars->ldelay = now - skb_time_func(skb);
-
- if (unlikely(skb_len > stats->maxpacket))
- stats->maxpacket = skb_len;
-
- if (codel_time_before(vars->ldelay, params->target) ||
- *backlog <= params->mtu) {
- /* went below - stay below for at least interval */
- vars->first_above_time = 0;
- return false;
- }
- ok_to_drop = false;
- if (vars->first_above_time == 0) {
- /* just went above from below. If we stay above
- * for at least interval we'll say it's ok to drop
- */
- vars->first_above_time = now + params->interval;
- } else if (codel_time_after(now, vars->first_above_time)) {
- ok_to_drop = true;
- }
- return ok_to_drop;
-}
-
-static struct sk_buff *codel_dequeue(void *ctx,
- u32 *backlog,
- struct codel_params *params,
- struct codel_vars *vars,
- struct codel_stats *stats,
- codel_skb_len_t skb_len_func,
- codel_skb_time_t skb_time_func,
- codel_skb_drop_t drop_func,
- codel_skb_dequeue_t dequeue_func)
-{
- struct sk_buff *skb = dequeue_func(vars, ctx);
- codel_time_t now;
- bool drop;
-
- if (!skb) {
- vars->dropping = false;
- return skb;
- }
- now = codel_get_time();
- drop = codel_should_drop(skb, ctx, vars, params, stats,
- skb_len_func, skb_time_func, backlog, now);
- if (vars->dropping) {
- if (!drop) {
- /* sojourn time below target - leave dropping state */
- vars->dropping = false;
- } else if (codel_time_after_eq(now, vars->drop_next)) {
- /* It's time for the next drop. Drop the current
- * packet and dequeue the next. The dequeue might
- * take us out of dropping state.
- * If not, schedule the next drop.
- * A large backlog might result in drop rates so high
- * that the next drop should happen now,
- * hence the while loop.
- */
- while (vars->dropping &&
- codel_time_after_eq(now, vars->drop_next)) {
- vars->count++; /* dont care of possible wrap
- * since there is no more divide
- */
- codel_Newton_step(vars);
- if (params->ecn && INET_ECN_set_ce(skb)) {
- stats->ecn_mark++;
- vars->drop_next =
- codel_control_law(vars->drop_next,
- params->interval,
- vars->rec_inv_sqrt);
- goto end;
- }
- stats->drop_len += skb_len_func(skb);
- drop_func(skb, ctx);
- stats->drop_count++;
- skb = dequeue_func(vars, ctx);
- if (!codel_should_drop(skb, ctx,
- vars, params, stats,
- skb_len_func,
- skb_time_func,
- backlog, now)) {
- /* leave dropping state */
- vars->dropping = false;
- } else {
- /* and schedule the next drop */
- vars->drop_next =
- codel_control_law(vars->drop_next,
- params->interval,
- vars->rec_inv_sqrt);
- }
- }
- }
- } else if (drop) {
- u32 delta;
-
- if (params->ecn && INET_ECN_set_ce(skb)) {
- stats->ecn_mark++;
- } else {
- stats->drop_len += skb_len_func(skb);
- drop_func(skb, ctx);
- stats->drop_count++;
-
- skb = dequeue_func(vars, ctx);
- drop = codel_should_drop(skb, ctx, vars, params,
- stats, skb_len_func,
- skb_time_func, backlog, now);
- }
- vars->dropping = true;
- /* if min went above target close to when we last went below it
- * assume that the drop rate that controlled the queue on the
- * last cycle is a good starting point to control it now.
- */
- delta = vars->count - vars->lastcount;
- if (delta > 1 &&
- codel_time_before(now - vars->drop_next,
- 16 * params->interval)) {
- vars->count = delta;
- /* we dont care if rec_inv_sqrt approximation
- * is not very precise :
- * Next Newton steps will correct it quadratically.
- */
- codel_Newton_step(vars);
- } else {
- vars->count = 1;
- vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
- }
- vars->lastcount = vars->count;
- vars->drop_next = codel_control_law(now, params->interval,
- vars->rec_inv_sqrt);
- }
-end:
- if (skb && codel_time_after(vars->ldelay, params->ce_threshold) &&
- INET_ECN_set_ce(skb))
- stats->ce_mark++;
- return skb;
-}
#endif
diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
new file mode 100644
index 000000000000..d289b91dcd65
--- /dev/null
+++ b/include/net/codel_impl.h
@@ -0,0 +1,255 @@
+#ifndef __NET_SCHED_CODEL_IMPL_H
+#define __NET_SCHED_CODEL_IMPL_H
+
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm
+ *
+ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+/* Controlling Queue Delay (CoDel) algorithm
+ * =========================================
+ * Source : Kathleen Nichols and Van Jacobson
+ * http://queue.acm.org/detail.cfm?id=2209336
+ *
+ * Implemented on linux by Dave Taht and Eric Dumazet
+ */
+
+static void codel_params_init(struct codel_params *params)
+{
+ params->interval = MS2TIME(100);
+ params->target = MS2TIME(5);
+ params->ce_threshold = CODEL_DISABLED_THRESHOLD;
+ params->ecn = false;
+}
+
+static void codel_vars_init(struct codel_vars *vars)
+{
+ memset(vars, 0, sizeof(*vars));
+}
+
+static void codel_stats_init(struct codel_stats *stats)
+{
+ stats->maxpacket = 0;
+}
+
+/*
+ * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+static void codel_Newton_step(struct codel_vars *vars)
+{
+ u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
+ u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
+ u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+
+ val >>= 2; /* avoid overflow in following multiply */
+ val = (val * invsqrt) >> (32 - 2 + 1);
+
+ vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
+}
+
+/*
+ * CoDel control_law is t + interval/sqrt(count)
+ * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
+ * both sqrt() and divide operation.
+ */
+static codel_time_t codel_control_law(codel_time_t t,
+ codel_time_t interval,
+ u32 rec_inv_sqrt)
+{
+ return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT);
+}
+
+static bool codel_should_drop(const struct sk_buff *skb,
+ void *ctx,
+ struct codel_vars *vars,
+ struct codel_params *params,
+ struct codel_stats *stats,
+ codel_skb_len_t skb_len_func,
+ codel_skb_time_t skb_time_func,
+ u32 *backlog,
+ codel_time_t now)
+{
+ bool ok_to_drop;
+ u32 skb_len;
+
+ if (!skb) {
+ vars->first_above_time = 0;
+ return false;
+ }
+
+ skb_len = skb_len_func(skb);
+ vars->ldelay = now - skb_time_func(skb);
+
+ if (unlikely(skb_len > stats->maxpacket))
+ stats->maxpacket = skb_len;
+
+ if (codel_time_before(vars->ldelay, params->target) ||
+ *backlog <= params->mtu) {
+ /* went below - stay below for at least interval */
+ vars->first_above_time = 0;
+ return false;
+ }
+ ok_to_drop = false;
+ if (vars->first_above_time == 0) {
+ /* just went above from below. If we stay above
+ * for at least interval we'll say it's ok to drop
+ */
+ vars->first_above_time = now + params->interval;
+ } else if (codel_time_after(now, vars->first_above_time)) {
+ ok_to_drop = true;
+ }
+ return ok_to_drop;
+}
+
+static struct sk_buff *codel_dequeue(void *ctx,
+ u32 *backlog,
+ struct codel_params *params,
+ struct codel_vars *vars,
+ struct codel_stats *stats,
+ codel_skb_len_t skb_len_func,
+ codel_skb_time_t skb_time_func,
+ codel_skb_drop_t drop_func,
+ codel_skb_dequeue_t dequeue_func)
+{
+ struct sk_buff *skb = dequeue_func(vars, ctx);
+ codel_time_t now;
+ bool drop;
+
+ if (!skb) {
+ vars->dropping = false;
+ return skb;
+ }
+ now = codel_get_time();
+ drop = codel_should_drop(skb, ctx, vars, params, stats,
+ skb_len_func, skb_time_func, backlog, now);
+ if (vars->dropping) {
+ if (!drop) {
+ /* sojourn time below target - leave dropping state */
+ vars->dropping = false;
+ } else if (codel_time_after_eq(now, vars->drop_next)) {
+ /* It's time for the next drop. Drop the current
+ * packet and dequeue the next. The dequeue might
+ * take us out of dropping state.
+ * If not, schedule the next drop.
+ * A large backlog might result in drop rates so high
+ * that the next drop should happen now,
+ * hence the while loop.
+ */
+ while (vars->dropping &&
+ codel_time_after_eq(now, vars->drop_next)) {
+ vars->count++; /* dont care of possible wrap
+ * since there is no more divide
+ */
+ codel_Newton_step(vars);
+ if (params->ecn && INET_ECN_set_ce(skb)) {
+ stats->ecn_mark++;
+ vars->drop_next =
+ codel_control_law(vars->drop_next,
+ params->interval,
+ vars->rec_inv_sqrt);
+ goto end;
+ }
+ stats->drop_len += skb_len_func(skb);
+ drop_func(skb, ctx);
+ stats->drop_count++;
+ skb = dequeue_func(vars, ctx);
+ if (!codel_should_drop(skb, ctx,
+ vars, params, stats,
+ skb_len_func,
+ skb_time_func,
+ backlog, now)) {
+ /* leave dropping state */
+ vars->dropping = false;
+ } else {
+ /* and schedule the next drop */
+ vars->drop_next =
+ codel_control_law(vars->drop_next,
+ params->interval,
+ vars->rec_inv_sqrt);
+ }
+ }
+ }
+ } else if (drop) {
+ u32 delta;
+
+ if (params->ecn && INET_ECN_set_ce(skb)) {
+ stats->ecn_mark++;
+ } else {
+ stats->drop_len += skb_len_func(skb);
+ drop_func(skb, ctx);
+ stats->drop_count++;
+
+ skb = dequeue_func(vars, ctx);
+ drop = codel_should_drop(skb, ctx, vars, params,
+ stats, skb_len_func,
+ skb_time_func, backlog, now);
+ }
+ vars->dropping = true;
+ /* if min went above target close to when we last went below it
+ * assume that the drop rate that controlled the queue on the
+ * last cycle is a good starting point to control it now.
+ */
+ delta = vars->count - vars->lastcount;
+ if (delta > 1 &&
+ codel_time_before(now - vars->drop_next,
+ 16 * params->interval)) {
+ vars->count = delta;
+ /* we dont care if rec_inv_sqrt approximation
+ * is not very precise :
+ * Next Newton steps will correct it quadratically.
+ */
+ codel_Newton_step(vars);
+ } else {
+ vars->count = 1;
+ vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
+ }
+ vars->lastcount = vars->count;
+ vars->drop_next = codel_control_law(now, params->interval,
+ vars->rec_inv_sqrt);
+ }
+end:
+ if (skb && codel_time_after(vars->ldelay, params->ce_threshold) &&
+ INET_ECN_set_ce(skb))
+ stats->ce_mark++;
+ return skb;
+}
+
+#endif
diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h
new file mode 100644
index 000000000000..8144d9cd2908
--- /dev/null
+++ b/include/net/codel_qdisc.h
@@ -0,0 +1,73 @@
+#ifndef __NET_SCHED_CODEL_QDISC_H
+#define __NET_SCHED_CODEL_QDISC_H
+
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm
+ *
+ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+/* Controlling Queue Delay (CoDel) algorithm
+ * =========================================
+ * Source : Kathleen Nichols and Van Jacobson
+ * http://queue.acm.org/detail.cfm?id=2209336
+ *
+ * Implemented on linux by Dave Taht and Eric Dumazet
+ */
+
+/* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */
+struct codel_skb_cb {
+ codel_time_t enqueue_time;
+};
+
+static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb)
+{
+ qdisc_cb_private_validate(skb, sizeof(struct codel_skb_cb));
+ return (struct codel_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static codel_time_t codel_get_enqueue_time(const struct sk_buff *skb)
+{
+ return get_codel_cb(skb)->enqueue_time;
+}
+
+static void codel_set_enqueue_time(struct sk_buff *skb)
+{
+ get_codel_cb(skb)->enqueue_time = codel_get_time();
+}
+
+#endif
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 512a94abe351..dddf3bb65a32 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -49,6 +49,8 @@
#include <linux/prefetch.h>
#include <net/pkt_sched.h>
#include <net/codel.h>
+#include <net/codel_impl.h>
+#include <net/codel_qdisc.h>
#define DEFAULT_CODEL_LIMIT 1000
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index dcf7266e6901..a5e420b3d4ab 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -24,6 +24,8 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/codel.h>
+#include <net/codel_impl.h>
+#include <net/codel_qdisc.h>
/* Fair Queue CoDel.
*
--
2.1.4
^ permalink raw reply related
* [PATCH 1/2] codel: generalize the implementation
From: Michal Kazior @ 2016-04-22 12:15 UTC (permalink / raw)
To: netdev; +Cc: davem, johannes, Michal Kazior
In-Reply-To: <1461327359-21646-1-git-send-email-michal.kazior@tieto.com>
This strips out qdisc specific bits from the code
and makes it slightly more reusable. Codel will be
used by wireless/mac80211 in the future.
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
---
include/net/codel.h | 64 +++++++++++++++++++++++++++++-------------------
net/sched/sch_codel.c | 20 ++++++++++++---
net/sched/sch_fq_codel.c | 19 +++++++++++---
3 files changed, 71 insertions(+), 32 deletions(-)
diff --git a/include/net/codel.h b/include/net/codel.h
index d168aca115cc..06ac687b4909 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -176,12 +176,10 @@ struct codel_stats {
#define CODEL_DISABLED_THRESHOLD INT_MAX
-static void codel_params_init(struct codel_params *params,
- const struct Qdisc *sch)
+static void codel_params_init(struct codel_params *params)
{
params->interval = MS2TIME(100);
params->target = MS2TIME(5);
- params->mtu = psched_mtu(qdisc_dev(sch));
params->ce_threshold = CODEL_DISABLED_THRESHOLD;
params->ecn = false;
}
@@ -226,28 +224,38 @@ static codel_time_t codel_control_law(codel_time_t t,
return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT);
}
+typedef u32 (*codel_skb_len_t)(const struct sk_buff *skb);
+typedef codel_time_t (*codel_skb_time_t)(const struct sk_buff *skb);
+typedef void (*codel_skb_drop_t)(struct sk_buff *skb, void *ctx);
+typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars,
+ void *ctx);
+
static bool codel_should_drop(const struct sk_buff *skb,
- struct Qdisc *sch,
+ void *ctx,
struct codel_vars *vars,
struct codel_params *params,
struct codel_stats *stats,
+ codel_skb_len_t skb_len_func,
+ codel_skb_time_t skb_time_func,
+ u32 *backlog,
codel_time_t now)
{
bool ok_to_drop;
+ u32 skb_len;
if (!skb) {
vars->first_above_time = 0;
return false;
}
- vars->ldelay = now - codel_get_enqueue_time(skb);
- sch->qstats.backlog -= qdisc_pkt_len(skb);
+ skb_len = skb_len_func(skb);
+ vars->ldelay = now - skb_time_func(skb);
- if (unlikely(qdisc_pkt_len(skb) > stats->maxpacket))
- stats->maxpacket = qdisc_pkt_len(skb);
+ if (unlikely(skb_len > stats->maxpacket))
+ stats->maxpacket = skb_len;
if (codel_time_before(vars->ldelay, params->target) ||
- sch->qstats.backlog <= params->mtu) {
+ *backlog <= params->mtu) {
/* went below - stay below for at least interval */
vars->first_above_time = 0;
return false;
@@ -264,16 +272,17 @@ static bool codel_should_drop(const struct sk_buff *skb,
return ok_to_drop;
}
-typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars,
- struct Qdisc *sch);
-
-static struct sk_buff *codel_dequeue(struct Qdisc *sch,
+static struct sk_buff *codel_dequeue(void *ctx,
+ u32 *backlog,
struct codel_params *params,
struct codel_vars *vars,
struct codel_stats *stats,
+ codel_skb_len_t skb_len_func,
+ codel_skb_time_t skb_time_func,
+ codel_skb_drop_t drop_func,
codel_skb_dequeue_t dequeue_func)
{
- struct sk_buff *skb = dequeue_func(vars, sch);
+ struct sk_buff *skb = dequeue_func(vars, ctx);
codel_time_t now;
bool drop;
@@ -282,7 +291,8 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
return skb;
}
now = codel_get_time();
- drop = codel_should_drop(skb, sch, vars, params, stats, now);
+ drop = codel_should_drop(skb, ctx, vars, params, stats,
+ skb_len_func, skb_time_func, backlog, now);
if (vars->dropping) {
if (!drop) {
/* sojourn time below target - leave dropping state */
@@ -310,12 +320,15 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
vars->rec_inv_sqrt);
goto end;
}
- stats->drop_len += qdisc_pkt_len(skb);
- qdisc_drop(skb, sch);
+ stats->drop_len += skb_len_func(skb);
+ drop_func(skb, ctx);
stats->drop_count++;
- skb = dequeue_func(vars, sch);
- if (!codel_should_drop(skb, sch,
- vars, params, stats, now)) {
+ skb = dequeue_func(vars, ctx);
+ if (!codel_should_drop(skb, ctx,
+ vars, params, stats,
+ skb_len_func,
+ skb_time_func,
+ backlog, now)) {
/* leave dropping state */
vars->dropping = false;
} else {
@@ -333,13 +346,14 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
if (params->ecn && INET_ECN_set_ce(skb)) {
stats->ecn_mark++;
} else {
- stats->drop_len += qdisc_pkt_len(skb);
- qdisc_drop(skb, sch);
+ stats->drop_len += skb_len_func(skb);
+ drop_func(skb, ctx);
stats->drop_count++;
- skb = dequeue_func(vars, sch);
- drop = codel_should_drop(skb, sch, vars, params,
- stats, now);
+ skb = dequeue_func(vars, ctx);
+ drop = codel_should_drop(skb, ctx, vars, params,
+ stats, skb_len_func,
+ skb_time_func, backlog, now);
}
vars->dropping = true;
/* if min went above target close to when we last went below it
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 9b7e2980ee5c..512a94abe351 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -64,20 +64,33 @@ struct codel_sched_data {
* to dequeue a packet from queue. Note: backlog is handled in
* codel, we dont need to reduce it here.
*/
-static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
+static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
{
+ struct Qdisc *sch = ctx;
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ if (skb)
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+
prefetch(&skb->end); /* we'll need skb_shinfo() */
return skb;
}
+static void drop_func(struct sk_buff *skb, void *ctx)
+{
+ struct Qdisc *sch = ctx;
+
+ qdisc_drop(skb, sch);
+}
+
static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
{
struct codel_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
+ skb = codel_dequeue(sch, &sch->qstats.backlog, &q->params, &q->vars,
+ &q->stats, qdisc_pkt_len, codel_get_enqueue_time,
+ drop_func, dequeue_func);
/* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
@@ -173,9 +186,10 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt)
sch->limit = DEFAULT_CODEL_LIMIT;
- codel_params_init(&q->params, sch);
+ codel_params_init(&q->params);
codel_vars_init(&q->vars);
codel_stats_init(&q->stats);
+ q->params.mtu = psched_mtu(qdisc_dev(sch));
if (opt) {
int err = codel_change(sch, opt);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index d3fc8f9dd3d4..dcf7266e6901 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -220,8 +220,9 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* to dequeue a packet from queue. Note: backlog is handled in
* codel, we dont need to reduce it here.
*/
-static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
+static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
{
+ struct Qdisc *sch = ctx;
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct fq_codel_flow *flow;
struct sk_buff *skb = NULL;
@@ -231,10 +232,18 @@ static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
skb = dequeue_head(flow);
q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb);
sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
}
return skb;
}
+static void drop_func(struct sk_buff *skb, void *ctx)
+{
+ struct Qdisc *sch = ctx;
+
+ qdisc_drop(skb, sch);
+}
+
static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -263,8 +272,9 @@ begin:
prev_ecn_mark = q->cstats.ecn_mark;
prev_backlog = sch->qstats.backlog;
- skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
- dequeue);
+ skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams,
+ &flow->cvars, &q->cstats, qdisc_pkt_len,
+ codel_get_enqueue_time, drop_func, dequeue_func);
flow->dropped += q->cstats.drop_count - prev_drop_count;
flow->dropped += q->cstats.ecn_mark - prev_ecn_mark;
@@ -423,9 +433,10 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
q->perturbation = prandom_u32();
INIT_LIST_HEAD(&q->new_flows);
INIT_LIST_HEAD(&q->old_flows);
- codel_params_init(&q->cparams, sch);
+ codel_params_init(&q->cparams);
codel_stats_init(&q->cstats);
q->cparams.ecn = true;
+ q->cparams.mtu = psched_mtu(qdisc_dev(sch));
if (opt) {
int err = fq_codel_change(sch, opt);
--
2.1.4
^ permalink raw reply related
* [PATCH 0/2] codel: make it reuseable beyond qdiscs
From: Michal Kazior @ 2016-04-22 12:15 UTC (permalink / raw)
To: netdev; +Cc: davem, johannes, Michal Kazior
Hi,
There's an ongoing effort in fixing wireless
bufferbloat. As part of that fq_codel is being
ported into mac80211. To prevent code duplication
codel.h needs to be slightly modified before it
can be used in mac80211 (or other drivers FWIW).
For more background please see:
https://www.spinics.net/lists/linux-wireless/msg149976.html
Michal Kazior (2):
codel: generalize the implementation
codel: split into multiple files
include/net/codel.h | 217 +--------------------------------------
include/net/codel_impl.h | 255 ++++++++++++++++++++++++++++++++++++++++++++++
include/net/codel_qdisc.h | 73 +++++++++++++
net/sched/sch_codel.c | 22 +++-
net/sched/sch_fq_codel.c | 21 +++-
5 files changed, 368 insertions(+), 220 deletions(-)
create mode 100644 include/net/codel_impl.h
create mode 100644 include/net/codel_qdisc.h
--
2.1.4
^ permalink raw reply
* [PATCH net-next] macsec: Convert to using IFF_NO_QUEUE
From: Phil Sutter @ 2016-04-22 12:02 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Sabrina Dubroca
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
drivers/net/macsec.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 84d3e5ca88171..6caa72402de7d 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2826,7 +2826,7 @@ static void macsec_free_netdev(struct net_device *dev)
static void macsec_setup(struct net_device *dev)
{
ether_setup(dev);
- dev->tx_queue_len = 0;
+ dev->priv_flags |= IFF_NO_QUEUE;
dev->netdev_ops = &macsec_netdev_ops;
dev->destructor = macsec_free_netdev;
--
2.8.0
^ permalink raw reply related
* [PATCH] cxgbi: fix uninitialized flowi6
From: Jiri Benc @ 2016-04-22 11:09 UTC (permalink / raw)
To: linux-scsi; +Cc: netdev, James E.J. Bottomley, Martin K. Petersen, Anish Bhatt
ip6_route_output looks into different fields in the passed flowi6 structure,
yet cxgbi passes garbage in nearly all those fields. Zero the structure out
first.
Fixes: fc8d0590d9142 ("libcxgbi: Add ipv6 api to driver")
Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
drivers/scsi/cxgbi/libcxgbi.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index f3bb7af4e984..ead83a24bcd1 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -688,6 +688,7 @@ static struct rt6_info *find_route_ipv6(const struct in6_addr *saddr,
{
struct flowi6 fl;
+ memset(&fl, 0, sizeof(fl));
if (saddr)
memcpy(&fl.saddr, saddr, sizeof(struct in6_addr));
if (daddr)
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next] route: move lwtunnel state to a single place
From: Jiri Benc @ 2016-04-22 10:40 UTC (permalink / raw)
To: netdev; +Cc: Lance Richardson
Commit 751a587ac9f9 ("route: fix breakage after moving lwtunnel state")
moved lwtstate to the end of dst_entry for 32bit archs. This makes it share
the cacheline with __refcnt which had an unkown effect on performance. For
this reason, the pointer was kept in place for 64bit archs.
However, later performance measurements showed this is of no concern. It
turns out that every performance sensitive path that accesses lwtstate
accesses also struct rtable or struct rt6_info which share the same cache
line.
Thus, to get rid of a few #ifdefs, move the field to the end of the struct
also for 64bit.
Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
Full analysis here: http://article.gmane.org/gmane.linux.network/375426
---
include/net/dst.h | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/include/net/dst.h b/include/net/dst.h
index 5c98443c1c9e..6835d224d47b 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -85,12 +85,11 @@ struct dst_entry {
#endif
#ifdef CONFIG_64BIT
- struct lwtunnel_state *lwtstate;
/*
* Align __refcnt to a 64 bytes alignment
* (L1_CACHE_SIZE would be too much)
*/
- long __pad_to_align_refcnt[1];
+ long __pad_to_align_refcnt[2];
#endif
/*
* __refcnt wants to be on a different cache line from
@@ -99,9 +98,7 @@ struct dst_entry {
atomic_t __refcnt; /* client references */
int __use;
unsigned long lastuse;
-#ifndef CONFIG_64BIT
struct lwtunnel_state *lwtstate;
-#endif
union {
struct dst_entry *next;
struct rtable __rcu *rt_next;
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH net v2 1/9] macsec: add missing NULL check after kmalloc
From: Lino Sanfilippo @ 2016-04-22 10:06 UTC (permalink / raw)
To: Sabrina Dubroca
Cc: netdev, Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter
In-Reply-To: <20160422094826.GA17059@bistromath.localdomain>
On 22.04.2016 11:48, Sabrina Dubroca wrote:
> 2016-04-22, 11:35:03 +0200, Lino Sanfilippo wrote:
>>
>>
>> On 22.04.2016 11:28, Sabrina Dubroca wrote:
>>> Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
>>> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
>>> Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
>>> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>>> ---
>>> drivers/net/macsec.c | 4 ++--
>>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
>>> index 84d3e5ca8817..f691030ee3df 100644
>>> --- a/drivers/net/macsec.c
>>> +++ b/drivers/net/macsec.c
>>> @@ -1622,8 +1622,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
>>> }
>>>
>>> rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL);
>>> - if (init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len,
>>> - secy->icv_len)) {
>>> + if (!rx_sa || init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
>>> + secy->key_len, secy->icv_len)) {
>>> rtnl_unlock();
>>> return -ENOMEM;
>>> }
>>
>>
>> In case that kmalloc was successful and init_rx_sa failed, the allocated memory should be freed, shouldnt it?.
>
> Yep, Lance pointed that out in v1:
> http://marc.info/?l=linux-netdev&m=146108796406155
>
> But since we have the same code with init_tx_sa, I decided to fix both
> in a separate patch (7/9 in this set).
>
Ah ok, sorry about the noise then :)
Regards,
Lino
^ permalink raw reply
* Re: [PATCH net v2 1/9] macsec: add missing NULL check after kmalloc
From: Sabrina Dubroca @ 2016-04-22 9:48 UTC (permalink / raw)
To: Lino Sanfilippo
Cc: netdev, Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter
In-Reply-To: <5719F047.80102@marvell.com>
2016-04-22, 11:35:03 +0200, Lino Sanfilippo wrote:
>
>
> On 22.04.2016 11:28, Sabrina Dubroca wrote:
> > Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
> > Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> > Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
> > Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> > ---
> > drivers/net/macsec.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
> > index 84d3e5ca8817..f691030ee3df 100644
> > --- a/drivers/net/macsec.c
> > +++ b/drivers/net/macsec.c
> > @@ -1622,8 +1622,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
> > }
> >
> > rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL);
> > - if (init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len,
> > - secy->icv_len)) {
> > + if (!rx_sa || init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
> > + secy->key_len, secy->icv_len)) {
> > rtnl_unlock();
> > return -ENOMEM;
> > }
>
>
> In case that kmalloc was successful and init_rx_sa failed, the allocated memory should be freed, shouldnt it?.
Yep, Lance pointed that out in v1:
http://marc.info/?l=linux-netdev&m=146108796406155
But since we have the same code with init_tx_sa, I decided to fix both
in a separate patch (7/9 in this set).
Thanks,
--
Sabrina
^ permalink raw reply
* Re: [PATCH net v2 1/9] macsec: add missing NULL check after kmalloc
From: Lino Sanfilippo @ 2016-04-22 9:35 UTC (permalink / raw)
To: Sabrina Dubroca, netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter
In-Reply-To: <d358a1ca10006d65c6d799e56d1abb7d17fda4a5.1461315621.git.sd@queasysnail.net>
On 22.04.2016 11:28, Sabrina Dubroca wrote:
> Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> ---
> drivers/net/macsec.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
> index 84d3e5ca8817..f691030ee3df 100644
> --- a/drivers/net/macsec.c
> +++ b/drivers/net/macsec.c
> @@ -1622,8 +1622,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
> }
>
> rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL);
> - if (init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len,
> - secy->icv_len)) {
> + if (!rx_sa || init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
> + secy->key_len, secy->icv_len)) {
> rtnl_unlock();
> return -ENOMEM;
> }
In case that kmalloc was successful and init_rx_sa failed, the allocated memory should be freed, shouldnt it?.
Regards,
Lino
^ permalink raw reply
* [PATCH net v2 9/9] macsec: fix netlink attribute validation
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
macsec_validate_attr should check IFLA_MACSEC_REPLAY_PROTECT (not
IFLA_MACSEC_PROTECT) to verify that the replay protection and replay
window arguments are correct.
Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 9f63cc7b0a73..c6385617bfb2 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3147,8 +3147,8 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[])
nla_get_u8(data[IFLA_MACSEC_VALIDATION]) > MACSEC_VALIDATE_MAX)
return -EINVAL;
- if ((data[IFLA_MACSEC_PROTECT] &&
- nla_get_u8(data[IFLA_MACSEC_PROTECT])) &&
+ if ((data[IFLA_MACSEC_REPLAY_PROTECT] &&
+ nla_get_u8(data[IFLA_MACSEC_REPLAY_PROTECT])) &&
!data[IFLA_MACSEC_WINDOW])
return -EINVAL;
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 8/9] macsec: add missing macsec prefix in uapi
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
I accidentally forgot some MACSEC_ prefixes in if_macsec.h.
Fixes: dece8d2b78d1 ("uapi: add MACsec bits")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 12 +++++++-----
include/uapi/linux/if_macsec.h | 4 ++--
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index b37d348b8ea0..9f63cc7b0a73 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2232,7 +2232,8 @@ static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb)
return 1;
if (nla_put_sci(skb, MACSEC_SECY_ATTR_SCI, secy->sci) ||
- nla_put_u64(skb, MACSEC_SECY_ATTR_CIPHER_SUITE, DEFAULT_CIPHER_ID) ||
+ nla_put_u64(skb, MACSEC_SECY_ATTR_CIPHER_SUITE,
+ MACSEC_DEFAULT_CIPHER_ID) ||
nla_put_u8(skb, MACSEC_SECY_ATTR_ICV_LEN, secy->icv_len) ||
nla_put_u8(skb, MACSEC_SECY_ATTR_OPER, secy->operational) ||
nla_put_u8(skb, MACSEC_SECY_ATTR_PROTECT, secy->protect_frames) ||
@@ -3096,7 +3097,7 @@ unregister:
static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[])
{
- u64 csid = DEFAULT_CIPHER_ID;
+ u64 csid = MACSEC_DEFAULT_CIPHER_ID;
u8 icv_len = DEFAULT_ICV_LEN;
int flag;
bool es, scb, sci;
@@ -3111,8 +3112,8 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[])
icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]);
switch (csid) {
- case DEFAULT_CIPHER_ID:
- case DEFAULT_CIPHER_ALT:
+ case MACSEC_DEFAULT_CIPHER_ID:
+ case MACSEC_DEFAULT_CIPHER_ALT:
if (icv_len < MACSEC_MIN_ICV_LEN ||
icv_len > MACSEC_MAX_ICV_LEN)
return -EINVAL;
@@ -3185,7 +3186,8 @@ static int macsec_fill_info(struct sk_buff *skb,
if (nla_put_sci(skb, IFLA_MACSEC_SCI, secy->sci) ||
nla_put_u8(skb, IFLA_MACSEC_ICV_LEN, secy->icv_len) ||
- nla_put_u64(skb, IFLA_MACSEC_CIPHER_SUITE, DEFAULT_CIPHER_ID) ||
+ nla_put_u64(skb, IFLA_MACSEC_CIPHER_SUITE,
+ MACSEC_DEFAULT_CIPHER_ID) ||
nla_put_u8(skb, IFLA_MACSEC_ENCODING_SA, tx_sc->encoding_sa) ||
nla_put_u8(skb, IFLA_MACSEC_ENCRYPT, tx_sc->encrypt) ||
nla_put_u8(skb, IFLA_MACSEC_PROTECT, secy->protect_frames) ||
diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
index 26b0d1e3e3e7..4c58d9917aa4 100644
--- a/include/uapi/linux/if_macsec.h
+++ b/include/uapi/linux/if_macsec.h
@@ -19,8 +19,8 @@
#define MACSEC_MAX_KEY_LEN 128
-#define DEFAULT_CIPHER_ID 0x0080020001000001ULL
-#define DEFAULT_CIPHER_ALT 0x0080C20001000001ULL
+#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL
+#define MACSEC_DEFAULT_CIPHER_ALT 0x0080C20001000001ULL
#define MACSEC_MIN_ICV_LEN 8
#define MACSEC_MAX_ICV_LEN 32
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 7/9] macsec: fix SA leak if initialization fails
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Reported-by: Lance Richardson <lrichard@redhat.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 826c6c9ce7fd..b37d348b8ea0 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1627,6 +1627,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL);
if (!rx_sa || init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
secy->key_len, secy->icv_len)) {
+ kfree(rx_sa);
rtnl_unlock();
return -ENOMEM;
}
@@ -1771,6 +1772,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
tx_sa = kmalloc(sizeof(*tx_sa), GFP_KERNEL);
if (!tx_sa || init_tx_sa(tx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
secy->key_len, secy->icv_len)) {
+ kfree(tx_sa);
rtnl_unlock();
return -ENOMEM;
}
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 6/9] macsec: fix memory leaks around rx_handler (un)registration
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
We leak a struct macsec_rxh_data when we unregister the rx_handler in
macsec_dellink.
We also leak a struct macsec_rxh_data in register_macsec_dev if we fail
to register the rx_handler.
Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 41fbe556ba6d..826c6c9ce7fd 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2931,8 +2931,10 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head)
unregister_netdevice_queue(dev, head);
list_del_rcu(&macsec->secys);
- if (list_empty(&rxd->secys))
+ if (list_empty(&rxd->secys)) {
netdev_rx_handler_unregister(real_dev);
+ kfree(rxd);
+ }
macsec_del_dev(macsec);
}
@@ -2954,8 +2956,10 @@ static int register_macsec_dev(struct net_device *real_dev,
err = netdev_rx_handler_register(real_dev, macsec_handle_frame,
rxd);
- if (err < 0)
+ if (err < 0) {
+ kfree(rxd);
return err;
+ }
}
list_add_tail_rcu(&macsec->secys, &rxd->secys);
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 5/9] macsec: add consistency check to netlink dumps
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
Use genl_dump_check_consistent in dump_secy.
Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Suggested-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 1fd2b147fda1..41fbe556ba6d 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2271,6 +2271,8 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev,
if (!hdr)
return -EMSGSIZE;
+ genl_dump_check_consistent(cb, hdr, &macsec_fam);
+
if (nla_put_u32(skb, MACSEC_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -2439,6 +2441,8 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int macsec_generation = 1; /* protected by RTNL */
+
static int macsec_dump_txsc(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -2449,6 +2453,9 @@ static int macsec_dump_txsc(struct sk_buff *skb, struct netlink_callback *cb)
d = 0;
rtnl_lock();
+
+ cb->seq = macsec_generation;
+
for_each_netdev(net, dev) {
struct macsec_secy *secy;
@@ -2920,6 +2927,8 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head)
struct net_device *real_dev = macsec->real_dev;
struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev);
+ macsec_generation++;
+
unregister_netdevice_queue(dev, head);
list_del_rcu(&macsec->secys);
if (list_empty(&rxd->secys))
@@ -3066,6 +3075,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
if (err < 0)
goto del_dev;
+ macsec_generation++;
+
dev_hold(real_dev);
return 0;
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 4/9] macsec: fix rx_sa refcounting with decrypt callback
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
The decrypt callback macsec_decrypt_done needs a reference on the rx_sa
and releases it before returning, but macsec_handle_frame already
put that reference after macsec_decrypt returned NULL.
Set rx_sa to NULL when the decrypt callback runs so that
macsec_handle_frame knows it must not release the reference.
Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 2a2136b7d324..1fd2b147fda1 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -880,12 +880,12 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
macsec_skb_cb(skb)->valid = false;
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
- return NULL;
+ return ERR_PTR(-ENOMEM);
req = aead_request_alloc(rx_sa->key.tfm, GFP_ATOMIC);
if (!req) {
kfree_skb(skb);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
hdr = (struct macsec_eth_header *)skb->data;
@@ -905,7 +905,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
skb = skb_unshare(skb, GFP_ATOMIC);
if (!skb) {
aead_request_free(req);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
} else {
/* integrity only: all headers + data authenticated */
@@ -921,14 +921,14 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
dev_hold(dev);
ret = crypto_aead_decrypt(req);
if (ret == -EINPROGRESS) {
- return NULL;
+ return ERR_PTR(ret);
} else if (ret != 0) {
/* decryption/authentication failed
* 10.6 if validateFrames is disabled, deliver anyway
*/
if (ret != -EBADMSG) {
kfree_skb(skb);
- skb = NULL;
+ skb = ERR_PTR(ret);
}
} else {
macsec_skb_cb(skb)->valid = true;
@@ -1146,8 +1146,10 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
secy->validate_frames != MACSEC_VALIDATE_DISABLED)
skb = macsec_decrypt(skb, dev, rx_sa, sci, secy);
- if (!skb) {
- macsec_rxsa_put(rx_sa);
+ if (IS_ERR(skb)) {
+ /* the decrypt callback needs the reference */
+ if (PTR_ERR(skb) != -EINPROGRESS)
+ macsec_rxsa_put(rx_sa);
rcu_read_unlock();
*pskb = NULL;
return RX_HANDLER_CONSUMED;
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 3/9] macsec: don't put a NULL rxsa
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
The "deliver:" path of macsec_handle_frame can be called with
rx_sa == NULL. Check rx_sa != NULL before calling macsec_rxsa_put().
Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 5f3ea8026074..2a2136b7d324 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1161,7 +1161,8 @@ deliver:
macsec_extra_len(macsec_skb_cb(skb)->has_sci));
macsec_reset_skb(skb, secy->netdev);
- macsec_rxsa_put(rx_sa);
+ if (rx_sa)
+ macsec_rxsa_put(rx_sa);
count_rx(dev, skb->len);
rcu_read_unlock();
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 2/9] macsec: take rtnl lock before for_each_netdev
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index f691030ee3df..5f3ea8026074 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2268,8 +2268,6 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev,
if (!hdr)
return -EMSGSIZE;
- rtnl_lock();
-
if (nla_put_u32(skb, MACSEC_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -2429,14 +2427,11 @@ static int dump_secy(struct macsec_secy *secy, struct net_device *dev,
nla_nest_end(skb, rxsc_list);
- rtnl_unlock();
-
genlmsg_end(skb, hdr);
return 0;
nla_put_failure:
- rtnl_unlock();
genlmsg_cancel(skb, hdr);
return -EMSGSIZE;
}
@@ -2450,6 +2445,7 @@ static int macsec_dump_txsc(struct sk_buff *skb, struct netlink_callback *cb)
dev_idx = cb->args[0];
d = 0;
+ rtnl_lock();
for_each_netdev(net, dev) {
struct macsec_secy *secy;
@@ -2467,6 +2463,7 @@ next:
}
done:
+ rtnl_unlock();
cb->args[0] = d;
return skb->len;
}
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 1/9] macsec: add missing NULL check after kmalloc
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
In-Reply-To: <cover.1461315621.git.sd@queasysnail.net>
Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
drivers/net/macsec.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 84d3e5ca8817..f691030ee3df 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1622,8 +1622,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
}
rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL);
- if (init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len,
- secy->icv_len)) {
+ if (!rx_sa || init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
+ secy->key_len, secy->icv_len)) {
rtnl_unlock();
return -ENOMEM;
}
--
2.8.0
^ permalink raw reply related
* [PATCH net v2 0/9] macsec: a few fixes
From: Sabrina Dubroca @ 2016-04-22 9:28 UTC (permalink / raw)
To: netdev
Cc: Lance Richardson, Hannes Frederic Sowa, Johannes Berg,
Dan Carpenter, Sabrina Dubroca
Some small fixes for the macsec driver:
- possible NULL pointer dereferences
- netlink dumps fixes: RTNL locking, consistent dumps
- a reference counting bug
- wrong name for uapi constant
- a few memory leaks
Patches 1 to 5 are the same as in v1, patches 6 to 9 are new.
Patch 6 fixes the memleak that Lance spotted in v1.
Sabrina Dubroca (9):
macsec: add missing NULL check after kmalloc
macsec: take rtnl lock before for_each_netdev
macsec: don't put a NULL rxsa
macsec: fix rx_sa refcounting with decrypt callback
macsec: add consistency check to netlink dumps
macsec: fix memory leaks around rx_handler (un)registration
macsec: fix SA leak if initialization fails
macsec: add missing macsec prefix in uapi
macsec: fix netlink attribute validation
drivers/net/macsec.c | 65 +++++++++++++++++++++++++++---------------
include/uapi/linux/if_macsec.h | 4 +--
2 files changed, 44 insertions(+), 25 deletions(-)
--
2.8.0
^ permalink raw reply
* Re: linux-next: build failure after merge of the net-next tree
From: Mark Brown @ 2016-04-22 9:20 UTC (permalink / raw)
To: David Miller
Cc: sfr, netdev, linux-next, linux-kernel, mark.d.rustad,
jeffrey.t.kirsher, andrewx.bowers, kernel-build-reports,
linaro-kernel
In-Reply-To: <20160413.111513.295841881765244758.davem@davemloft.net>
[-- Attachment #1: Type: text/plain, Size: 816 bytes --]
On Wed, Apr 13, 2016 at 11:15:13AM -0400, David Miller wrote:
> From: Stephen Rothwell <sfr@canb.auug.org.au>
> > After merging the net-next tree, today's linux-next build (arm
> > allmodconfig) failed like thisi (this has actually been failing for a
> > few days, now):
> > ERROR: "__bad_udelay" [drivers/net/ethernet/intel/ixgbe/ixgbe.ko] undefined!
> > Caused by commit
> > 49425dfc7451 ("ixgbe: Add support for x550em_a 10G MAC type")
> > arm only allows udelay()s up to 2 milliseconds. This commit
> > adds a 5 ms udelay in ixgbe_acquire_swfw_sync_x550em_a() in
> > drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c.
> Jeff, please have your folks look into this. Probably just a simple
> conversion to mdelay().
This is still present, it's been breaking ARM allmodconfig builds for
about two weeks now.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]
^ permalink raw reply
* Re: [RFC PATCH] gro: Partly revert "net: gro: allow to build full sized skb"
From: Steffen Klassert @ 2016-04-22 9:13 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Sowmini Varadhan, netdev
In-Reply-To: <1461243546.7627.15.camel@edumazet-glaptop3.roam.corp.google.com>
On Thu, Apr 21, 2016 at 05:59:06AM -0700, Eric Dumazet wrote:
> On Thu, 2016-04-21 at 09:40 +0200, Steffen Klassert wrote:
> >
> > Hi Eric, this is a followup on our discussion at the netdev
> > conference. Would you still be ok with this revert, or do
> > you think there is a better solution in sight?
>
> Note that some GRO enabled drivers would still generate frag_list.
>
> (This happens if they are using skb with some TCP payload in skb->head
> and skb->head was allocated with kmalloc())
>
> We have sysctl_max_skb_frags sysctl, we might have a sysctl
> enabling/disabling GRO from building any frag_list.
> Or simply reuse an existing one, like /proc/sys/net/ipv4/ip_forward ?)
Reusing the ipv4/ipv6 forwarding sysctls would be probably the
simplest solution, but maybe we can do the partial split in
the GSO layer that Alex proposed. Then we would not need to
change the way GRO builds the buffers.
>
> Here at Google, we increased MAX_SKB_FRAGS, but this is a rather
> intrusive change to be upstreamed :(
I've played here with MAX_SKB_FRAGS too, but it seems to
be device specific how many page fragments it can handle.
I wonder if we could increase MAX_SKB_FRAGS at the GRO
layer and let GSO split this buffer in something that
the transmitting device can handle?
^ permalink raw reply
* Re: [RFC PATCH] gro: Partly revert "net: gro: allow to build full sized skb"
From: Steffen Klassert @ 2016-04-22 8:51 UTC (permalink / raw)
To: Alexander Duyck; +Cc: Eric Dumazet, Sowmini Varadhan, Netdev
In-Reply-To: <CAKgT0Uc-MfZseKauJMW+xh+5LTEww6hcWmuXeCATsDA+Z-POsQ@mail.gmail.com>
On Thu, Apr 21, 2016 at 09:02:48AM -0700, Alexander Duyck wrote:
> On Thu, Apr 21, 2016 at 12:40 AM, Steffen Klassert
> <steffen.klassert@secunet.com> wrote:
> > This partly reverts the below mentioned patch because on
> > forwarding, such skbs can't be offloaded to a NIC.
> >
> > We need this to get IPsec GRO for forwarding to work properly,
> > otherwise the GRO aggregated packets get segmented again by
> > the GSO layer. Although discovered when implementing IPsec GRO,
> > this is a general problem in the forwarding path.
>
> I'm confused as to why you would need this to get IPsec GRO forwarding
> to work.
It works without this, but the performance numbers are not that good
if we have to do GSO in software.
> Are you having to go through a device that doesn't have
> NETIF_F_FRAGLIST defined?
I don't know of any NIC that can do TSO on a skbuff with fraglist,
that's why I try to avoid to have a buffer with fraglist.
> Also what is the issue with having to go
> through the GSO layer on segmentation? It seems like we might be able
> to do something like what we did with GSO partial to split frames so
> that they are in chunks that wouldn't require NETIF_F_FRAGLIST. Then
> you could get the best of both worlds in that the stack would only
> process one super-frame, and the transmitter could TSO a series of
> frames that are some fixed MSS in size.
This could be interesting. Then we could have a buffer with
fraglist, GSO layer splits in skbuffs without fraglist that
can be TSO offloaded. Something like this might solve my
performance problems.
^ permalink raw reply
* Re: [PATCH net-next] hv_netvsc: Fix the list processing for network change event
From: Vitaly Kuznetsov @ 2016-04-22 8:49 UTC (permalink / raw)
To: Haiyang Zhang; +Cc: olaf, netdev, driverdev-devel, linux-kernel, davem
In-Reply-To: <1461280381-17530-1-git-send-email-haiyangz@microsoft.com>
Haiyang Zhang <haiyangz@microsoft.com> writes:
> RNDIS_STATUS_NETWORK_CHANGE event is handled as two "half events" --
> media disconnect & connect. The second half should be added to the list
> head, not to the tail. So all events are processed in normal order.
>
Thanks,
this matters when we get some other events in between these two halves.
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
> ---
> drivers/net/hyperv/netvsc_drv.c | 2 +-
> 1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index bfdb568a..ba3f3f3 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -1125,7 +1125,7 @@ static void netvsc_link_change(struct work_struct *w)
> netif_tx_stop_all_queues(net);
> event->event = RNDIS_STATUS_MEDIA_CONNECT;
> spin_lock_irqsave(&ndev_ctx->lock, flags);
> - list_add_tail(&event->list, &ndev_ctx->reconfig_events);
> + list_add(&event->list, &ndev_ctx->reconfig_events);
> spin_unlock_irqrestore(&ndev_ctx->lock, flags);
> reschedule = true;
> }
--
Vitaly
^ permalink raw reply
* [PATCH net-next 09/10] net: hns: add attribute port-rst-offset for dsaf port node
From: Yisen Zhuang @ 2016-04-22 7:20 UTC (permalink / raw)
To: devicetree-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA
Cc: robh+dt-DgEjT+Ai2ygdnm+yROfE0A, pawel.moll-5wv7dgnIgG8,
mark.rutland-5wv7dgnIgG8, ijc+devicetree-KcIKpvwj1kUDXYZnReoRVg,
galak-sgV2jX0FEOL9JmXXK+q4OQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
yankejian-hv44wF8Li93QT0dZR+AlfA,
huangdaode-C8/M+/jPZTeaMJb+Lgu22Q,
salil.mehta-hv44wF8Li93QT0dZR+AlfA,
lipeng321-hv44wF8Li93QT0dZR+AlfA, liguozhu-hv44wF8Li93QT0dZR+AlfA,
xieqianqian-hv44wF8Li93QT0dZR+AlfA,
linuxarm-hv44wF8Li93QT0dZR+AlfA
In-Reply-To: <1461309619-167621-1-git-send-email-Yisen.Zhuang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
The reset offset for each port in a dsaf is different. The current code is
not so readability. This patch adds configuration named port-rst-offset to
make the code more simple and readability. If this attribute doesn't exist,
default value of this attribute is equal to its port index.
Signed-off-by: Yisen Zhuang <yisen.zhuang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
.../devicetree/bindings/net/hisilicon-hns-dsaf.txt | 2 ++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 10 +++++++++
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 1 +
drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 25 +++++++---------------
4 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index 0a1647e..2afc3fa 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -43,6 +43,8 @@ Required properties:
isn't cpld device.
- cpld-ctrl-reg: is cpld register offset. It is not required if there isn't
cpld-syscon.
+- port-rst-offset: is offset of reset field for each port in dsaf. Its value
+ depends on the hardware user manual.
[1] Documentation/devicetree/bindings/net/phy.txt
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 7073ca2..52d757d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -664,6 +664,7 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb)
mac_cb->max_frm = MAC_DEFAULT_MTU;
mac_cb->tx_pause_frm_time = MAC_DEFAULT_PAUSE_TIME;
+ mac_cb->port_rst_off = mac_cb->mac_id;
/* if the dsaf node doesn't contain a port subnode, get phy-handle
* from dsaf node
@@ -693,6 +694,15 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb)
}
mac_cb->serdes_ctrl = syscon;
+ ret = fwnode_property_read_u32(mac_cb->fw_port,
+ "port-rst-offset",
+ &mac_cb->port_rst_off);
+ if (ret) {
+ dev_dbg(mac_cb->dev,
+ "mac%d port-rst-offset not found, use default value.\n",
+ mac_cb->mac_id);
+ }
+
syscon = syscon_node_to_regmap(
of_parse_phandle(to_of_node(mac_cb->fw_port),
"cpld-syscon", 0));
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 719816b..7be7104 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -318,6 +318,7 @@ struct hns_mac_cb {
struct regmap *serdes_ctrl;
struct regmap *cpld_ctrl;
u32 cpld_ctrl_reg;
+ u32 port_rst_off;
struct mac_entry_idx addr_entry_idx[DSAF_MAX_VM_NUM];
u8 sfp_prsnt;
u8 cpld_led_value;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index c549aa8..e549a11 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -135,11 +135,7 @@ void hns_dsaf_xge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
return;
reg_val |= RESET_REQ_OR_DREQ;
-
- if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
- reg_val |= 0x2082082 << port;
- else
- reg_val |= 0x2082082 << (dsaf_dev->reset_offset + 6);
+ reg_val |= 0x2082082 << dsaf_dev->mac_cb[port]->port_rst_off;
if (val == 0)
reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
@@ -158,11 +154,8 @@ void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
if (port >= DSAF_XGE_NUM)
return;
- if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
- reg_val |= XGMAC_TRX_CORE_SRST_M << port;
- else
- reg_val |= XGMAC_TRX_CORE_SRST_M <<
- (dsaf_dev->reset_offset + 6);
+ reg_val |= XGMAC_TRX_CORE_SRST_M
+ << dsaf_dev->mac_cb[port]->port_rst_off;
if (val == 0)
reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
@@ -176,17 +169,19 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
{
u32 reg_val_1;
u32 reg_val_2;
+ u32 port_rst_off;
if (port >= DSAF_GE_NUM)
return;
if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
reg_val_1 = 0x1 << port;
+ port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off;
/* there is difference between V1 and V2 in register.*/
if (AE_IS_VER1(dsaf_dev->dsaf_ver))
- reg_val_2 = 0x1041041 << port;
+ reg_val_2 = 0x1041041 << port_rst_off;
else
- reg_val_2 = 0x2082082 << port;
+ reg_val_2 = 0x2082082 << port_rst_off;
if (val == 0) {
dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG,
@@ -226,11 +221,7 @@ void hns_ppe_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, u32 val)
u32 reg_val = 0;
u32 reg_addr;
- if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
- reg_val |= RESET_REQ_OR_DREQ << port;
- else
- reg_val |= RESET_REQ_OR_DREQ <<
- (dsaf_dev->reset_offset + 6);
+ reg_val |= RESET_REQ_OR_DREQ << dsaf_dev->mac_cb[port]->port_rst_off;
if (val == 0)
reg_addr = DSAF_SUB_SC_PPE_RESET_REQ_REG;
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH net-next 06/10] net: hns: sort the header file by alphabetical order
From: Yisen Zhuang @ 2016-04-22 7:20 UTC (permalink / raw)
To: devicetree-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA
Cc: robh+dt-DgEjT+Ai2ygdnm+yROfE0A, pawel.moll-5wv7dgnIgG8,
mark.rutland-5wv7dgnIgG8, ijc+devicetree-KcIKpvwj1kUDXYZnReoRVg,
galak-sgV2jX0FEOL9JmXXK+q4OQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
yankejian-hv44wF8Li93QT0dZR+AlfA,
huangdaode-C8/M+/jPZTeaMJb+Lgu22Q,
salil.mehta-hv44wF8Li93QT0dZR+AlfA,
lipeng321-hv44wF8Li93QT0dZR+AlfA, liguozhu-hv44wF8Li93QT0dZR+AlfA,
xieqianqian-hv44wF8Li93QT0dZR+AlfA,
linuxarm-hv44wF8Li93QT0dZR+AlfA
In-Reply-To: <1461309619-167621-1-git-send-email-Yisen.Zhuang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
From: Daode Huang <huangdaode-C8/M+/jPZTeaMJb+Lgu22Q@public.gmane.org>
This patch tunes the header file by the alphabetical order.
Signed-off-by: Daode Huang <huangdaode-C8/M+/jPZTeaMJb+Lgu22Q@public.gmane.org>
Signed-off-by: Yisen Zhuang <yisen.zhuang-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 12 ++++++------
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 12 ++++++------
drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 4 ++--
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 353b9e7..3730385 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -7,18 +7,18 @@
* (at your option) any later version.
*/
-#include <linux/module.h>
-#include <linux/kernel.h>
#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/phy_fixed.h>
#include <linux/interrupt.h>
-#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/phy_fixed.h>
+#include <linux/platform_device.h>
-#include "hns_dsaf_misc.h"
#include "hns_dsaf_main.h"
+#include "hns_dsaf_misc.h"
#include "hns_dsaf_rcb.h"
#define MAC_EN_FLAG_V 0xada0328
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index b418d42..98e0e83 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -7,22 +7,22 @@
* (at your option) any later version.
*/
-#include <linux/module.h>
-#include <linux/kernel.h>
+#include <linux/device.h>
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/netdevice.h>
-#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/device.h>
+#include <linux/platform_device.h>
#include <linux/vmalloc.h>
+#include "hns_dsaf_mac.h"
#include "hns_dsaf_main.h"
-#include "hns_dsaf_rcb.h"
#include "hns_dsaf_ppe.h"
-#include "hns_dsaf_mac.h"
+#include "hns_dsaf_rcb.h"
const char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
[DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf",
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 91e0382..67c8b9e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -7,10 +7,10 @@
* (at your option) any later version.
*/
-#include "hns_dsaf_misc.h"
#include "hns_dsaf_mac.h"
-#include "hns_dsaf_reg.h"
+#include "hns_dsaf_misc.h"
#include "hns_dsaf_ppe.h"
+#include "hns_dsaf_reg.h"
void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
u16 speed, int data)
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox