* [PATCH net-next 1/6] net: Add skb_get_hash_perturb
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
@ 2015-03-04 18:39 ` Tom Herbert
2015-03-04 18:40 ` [PATCH net-next 2/6] sched: Eliminate use of flow_keys in sch_choke Tom Herbert
` (6 subsequent siblings)
7 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-03-04 18:39 UTC (permalink / raw)
To: davem, netdev, eric.dumazet, fw
This is used to get the skb->hash and then perturb it for a local use.
Signed-off-by: Tom Herbert <therbert@google.com>
---
include/linux/skbuff.h | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bba1330..10572b6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/kmemcheck.h>
#include <linux/compiler.h>
+#include <linux/jhash.h>
#include <linux/time.h>
#include <linux/bug.h>
#include <linux/cache.h>
@@ -920,6 +921,20 @@ static inline __u32 skb_get_hash(struct sk_buff *skb)
return skb->hash;
}
+static inline __u32 skb_get_hash_perturb(struct sk_buff *skb,
+ u32 perturb)
+{
+ u32 hash = skb_get_hash(skb);
+
+ if (likely(hash)) {
+ hash = jhash_1word((__force __u32) hash, perturb);
+ if (unlikely(!hash))
+ hash = 1;
+ }
+
+ return hash;
+}
+
static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
{
return skb->hash;
--
2.2.0.rc0.207.ga3a616c
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH net-next 2/6] sched: Eliminate use of flow_keys in sch_choke
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
2015-03-04 18:39 ` [PATCH net-next 1/6] net: Add skb_get_hash_perturb Tom Herbert
@ 2015-03-04 18:40 ` Tom Herbert
2015-03-04 18:40 ` [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel Tom Herbert
` (5 subsequent siblings)
7 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-03-04 18:40 UTC (permalink / raw)
To: davem, netdev, eric.dumazet, fw
In choke_match_flow compare skb_get_hash values for the skbuffs
instead of explicitly matching keys after calling flow_dissector.
Signed-off-by: Tom Herbert <therbert@google.com>
---
net/sched/sch_choke.c | 32 ++------------------------------
1 file changed, 2 insertions(+), 30 deletions(-)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index c009eb9..8faa375 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -18,7 +18,6 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/red.h>
-#include <net/flow_keys.h>
/*
CHOKe stateless AQM for fair bandwidth allocation
@@ -133,16 +132,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
--sch->q.qlen;
}
-/* private part of skb->cb[] that a qdisc is allowed to use
- * is limited to QDISC_CB_PRIV_LEN bytes.
- * As a flow key might be too large, we store a part of it only.
- */
-#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
-
struct choke_skb_cb {
u16 classid;
- u8 keys_valid;
- u8 keys[QDISC_CB_PRIV_LEN - 3];
};
static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -169,26 +160,8 @@ static u16 choke_get_classid(const struct sk_buff *skb)
static bool choke_match_flow(struct sk_buff *skb1,
struct sk_buff *skb2)
{
- struct flow_keys temp;
-
- if (skb1->protocol != skb2->protocol)
- return false;
-
- if (!choke_skb_cb(skb1)->keys_valid) {
- choke_skb_cb(skb1)->keys_valid = 1;
- skb_flow_dissect(skb1, &temp);
- memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
- }
-
- if (!choke_skb_cb(skb2)->keys_valid) {
- choke_skb_cb(skb2)->keys_valid = 1;
- skb_flow_dissect(skb2, &temp);
- memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
- }
-
- return !memcmp(&choke_skb_cb(skb1)->keys,
- &choke_skb_cb(skb2)->keys,
- CHOKE_K_LEN);
+ return (skb1->protocol == skb2->protocol &&
+ skb_get_hash(skb1) == skb_get_hash(skb2));
}
/*
@@ -279,7 +252,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
goto other_drop; /* Packet was eaten by filter */
}
- choke_skb_cb(skb)->keys_valid = 0;
/* Compute average queue usage (see RED) */
q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
if (red_is_idling(&q->vars))
--
2.2.0.rc0.207.ga3a616c
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
2015-03-04 18:39 ` [PATCH net-next 1/6] net: Add skb_get_hash_perturb Tom Herbert
2015-03-04 18:40 ` [PATCH net-next 2/6] sched: Eliminate use of flow_keys in sch_choke Tom Herbert
@ 2015-03-04 18:40 ` Tom Herbert
2015-03-04 18:40 ` [PATCH net-next 4/6] sched: Eliminate use of flow_keys in sch_hhf Tom Herbert
` (4 subsequent siblings)
7 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-03-04 18:40 UTC (permalink / raw)
To: davem, netdev, eric.dumazet, fw
Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.
Signed-off-by: Tom Herbert <therbert@google.com>
---
net/sched/sch_fq_codel.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 1e52dec..a6fc53d 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <net/flow_keys.h>
#include <net/codel.h>
/* Fair Queue CoDel.
@@ -68,15 +67,9 @@ struct fq_codel_sched_data {
};
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
- const struct sk_buff *skb)
+ struct sk_buff *skb)
{
- struct flow_keys keys;
- unsigned int hash;
-
- skb_flow_dissect(skb, &keys);
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src ^ keys.ip_proto,
- (__force u32)keys.ports, q->perturbation);
+ u32 hash = skb_get_hash_perturb(skb, q->perturbation);
return reciprocal_scale(hash, q->flows_cnt);
}
--
2.2.0.rc0.207.ga3a616c
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH net-next 4/6] sched: Eliminate use of flow_keys in sch_hhf
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
` (2 preceding siblings ...)
2015-03-04 18:40 ` [PATCH net-next 3/6] sched: Eliminate use of flow_keys in sch_fq_codel Tom Herbert
@ 2015-03-04 18:40 ` Tom Herbert
2015-03-04 18:40 ` [PATCH net-next 5/6] sched: Eliminate use of flow_keys in sch_sfb Tom Herbert
` (3 subsequent siblings)
7 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-03-04 18:40 UTC (permalink / raw)
To: davem, netdev, eric.dumazet, fw
Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.
Signed-off-by: Tom Herbert <therbert@google.com>
---
net/sched/sch_hhf.c | 19 +------------------
1 file changed, 1 insertion(+), 18 deletions(-)
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aab..9d15cb6 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -9,7 +9,6 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
-#include <net/flow_keys.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void)
return jiffies;
}
-static unsigned int skb_hash(const struct hhf_sched_data *q,
- const struct sk_buff *skb)
-{
- struct flow_keys keys;
- unsigned int hash;
-
- if (skb->sk && skb->sk->sk_hash)
- return skb->sk->sk_hash;
-
- skb_flow_dissect(skb, &keys);
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src ^ keys.ip_proto,
- (__force u32)keys.ports, q->perturbation);
- return hash;
-}
-
/* Looks up a heavy-hitter flow in a chaining list of table T. */
static struct hh_flow_state *seek_list(const u32 hash,
struct list_head *head,
@@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
}
/* Get hashed flow-id of the skb. */
- hash = skb_hash(q, skb);
+ hash = skb_get_hash_perturb(skb, q->perturbation);
/* Check if this packet belongs to an already established HH flow. */
flow_pos = hash & HHF_BIT_MASK;
--
2.2.0.rc0.207.ga3a616c
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH net-next 5/6] sched: Eliminate use of flow_keys in sch_sfb
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
` (3 preceding siblings ...)
2015-03-04 18:40 ` [PATCH net-next 4/6] sched: Eliminate use of flow_keys in sch_hhf Tom Herbert
@ 2015-03-04 18:40 ` Tom Herbert
2015-03-04 18:40 ` [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq Tom Herbert
` (2 subsequent siblings)
7 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-03-04 18:40 UTC (permalink / raw)
To: davem, netdev, eric.dumazet, fw
Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.
Signed-off-by: Tom Herbert <therbert@google.com>
---
net/sched/sch_sfb.c | 24 ++++++++----------------
1 file changed, 8 insertions(+), 16 deletions(-)
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5819dd8..402d051 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,7 +26,6 @@
#include <net/ip.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-#include <net/flow_keys.h>
/*
* SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
- u32 r, slot, salt, sfbhash;
+ u32 r, sfbhash;
+ u32 slot = q->slot;
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- struct flow_keys keys;
if (unlikely(sch->q.qlen >= q->limit)) {
qdisc_qstats_overlimit(sch);
@@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
fl = rcu_dereference_bh(q->filter_list);
if (fl) {
+ u32 salt;
+
/* If using external classifiers, get result and record it. */
if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
- keys.src = salt;
- keys.dst = 0;
- keys.ports = 0;
+ sfbhash = jhash_1word(salt, q->bins[q->slot].perturbation);
} else {
- skb_flow_dissect(skb, &keys);
+ sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
}
- slot = q->slot;
- sfbhash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports,
- q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(p_min >= SFB_MAX_PROB)) {
/* Inelastic flow */
if (q->double_buffering) {
- sfbhash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports,
- q->bins[slot].perturbation);
+ sfbhash = skb_get_hash_perturb(skb,
+ q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
--
2.2.0.rc0.207.ga3a616c
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
` (4 preceding siblings ...)
2015-03-04 18:40 ` [PATCH net-next 5/6] sched: Eliminate use of flow_keys in sch_sfb Tom Herbert
@ 2015-03-04 18:40 ` Tom Herbert
2015-03-04 20:03 ` [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Eric Dumazet
2015-03-05 11:13 ` David Laight
7 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-03-04 18:40 UTC (permalink / raw)
To: davem, netdev, eric.dumazet, fw
Call qdisc_skb_get_hash instead of doing skb_flow_dissect and then
jhash by hand.
Signed-off-by: Tom Herbert <therbert@google.com>
---
net/sched/sch_sfq.c | 29 +++--------------------------
1 file changed, 3 insertions(+), 26 deletions(-)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140..e7ed8a5 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <net/flow_keys.h>
#include <net/red.h>
@@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
return &q->dep[val - SFQ_MAX_FLOWS];
}
-/*
- * In order to be able to quickly rehash our queue when timer changes
- * q->perturbation, we store flow_keys in skb->cb[]
- */
-struct sfq_skb_cb {
- struct flow_keys keys;
-};
-
-static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
-{
- qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb));
- return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
-}
-
static unsigned int sfq_hash(const struct sfq_sched_data *q,
- const struct sk_buff *skb)
+ struct sk_buff *skb)
{
- const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
- unsigned int hash;
-
- hash = jhash_3words((__force u32)keys->dst,
- (__force u32)keys->src ^ keys->ip_proto,
- (__force u32)keys->ports, q->perturbation);
- return hash & (q->divisor - 1);
+ return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -196,10 +175,8 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return TC_H_MIN(skb->priority);
fl = rcu_dereference_bh(q->filter_list);
- if (!fl) {
- skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
+ if (!fl)
return sfq_hash(q, skb) + 1;
- }
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tc_classify(skb, fl, &res);
--
2.2.0.rc0.207.ga3a616c
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
` (5 preceding siblings ...)
2015-03-04 18:40 ` [PATCH net-next 6/6] sched: Eliminate use of flow_keys in sch_sfq Tom Herbert
@ 2015-03-04 20:03 ` Eric Dumazet
2015-03-04 21:49 ` Tom Herbert
2015-03-05 21:06 ` David Miller
2015-03-05 11:13 ` David Laight
7 siblings, 2 replies; 15+ messages in thread
From: Eric Dumazet @ 2015-03-04 20:03 UTC (permalink / raw)
To: Tom Herbert; +Cc: davem, netdev, fw
On Wed, 2015-03-04 at 10:39 -0800, Tom Herbert wrote:
> - The flow keys are passed in cb[] structure. This severely limits
> our ability to increase the key space. For instance, the folding
> of IPv6 addresses in flow_dissector to make a 32-bit value
> represents a significant loss of information (it would be easy
> to create millions of different 4-tuples that would always produce the
> same hash value).
Yes, but then your patch is all about reducing flow compares to a single
u32 comparison in qdiscs (and elsewhere)
choke for example explicitly wants to make sure we drop a companion
if incoming packet belongs to the same flow.
Relying on a 'strong hash' whatever it can be was not considered in
Choke paper. There is no mention of a stochastic match.
If we no longer can store the keys in skb->cb[], fine (although I claim
skb->cb[] size should be irrelevant, see our discussion on this topic
with Florian)
-> Just recompute the keys, using a local variable, from packet
content. Yes, it will be more expensive, but hey, we get what we want.
Same for sfq : your skb_get_hash_perturb() doesn't address the point I
made earlier.
It is only giving a false sense of security.
I would rather not spread it.
(Note that there is no documentation or changelog to explain the
pro/cons)
I doubt OVS would condense their flow keys in a single u32...
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs
2015-03-04 20:03 ` [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Eric Dumazet
@ 2015-03-04 21:49 ` Tom Herbert
2015-03-04 22:31 ` Eric Dumazet
2015-03-05 21:06 ` David Miller
1 sibling, 1 reply; 15+ messages in thread
From: Tom Herbert @ 2015-03-04 21:49 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, Linux Netdev List, Florian Westphal
On Wed, Mar 4, 2015 at 12:03 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Wed, 2015-03-04 at 10:39 -0800, Tom Herbert wrote:
>
>> - The flow keys are passed in cb[] structure. This severely limits
>> our ability to increase the key space. For instance, the folding
>> of IPv6 addresses in flow_dissector to make a 32-bit value
>> represents a significant loss of information (it would be easy
>> to create millions of different 4-tuples that would always produce the
>> same hash value).
>
>
> Yes, but then your patch is all about reducing flow compares to a single
> u32 comparison in qdiscs (and elsewhere)
>
> choke for example explicitly wants to make sure we drop a companion
> if incoming packet belongs to the same flow.
>
> Relying on a 'strong hash' whatever it can be was not considered in
> Choke paper. There is no mention of a stochastic match.
>
Then choke is broke :-) ipv6_addr_hash already makes the "flow" match
stochastic.
> If we no longer can store the keys in skb->cb[], fine (although I claim
> skb->cb[] size should be irrelevant, see our discussion on this topic
> with Florian)
> -> Just recompute the keys, using a local variable, from packet
> content. Yes, it will be more expensive, but hey, we get what we want.
>
> Same for sfq : your skb_get_hash_perturb() doesn't address the point I
> made earlier.
>
> It is only giving a false sense of security.
> I would rather not spread it.
> (Note that there is no documentation or changelog to explain the
> pro/cons)
>
> I doubt OVS would condense their flow keys in a single u32...
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs
2015-03-04 20:03 ` [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Eric Dumazet
2015-03-04 21:49 ` Tom Herbert
@ 2015-03-05 21:06 ` David Miller
1 sibling, 0 replies; 15+ messages in thread
From: David Miller @ 2015-03-05 21:06 UTC (permalink / raw)
To: eric.dumazet; +Cc: therbert, netdev, fw
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 04 Mar 2015 12:03:03 -0800
> Yes, but then your patch is all about reducing flow compares to a single
> u32 comparison in qdiscs (and elsewhere)
>
> choke for example explicitly wants to make sure we drop a companion
> if incoming packet belongs to the same flow.
>
> Relying on a 'strong hash' whatever it can be was not considered in
> Choke paper. There is no mention of a stochastic match.
>
> If we no longer can store the keys in skb->cb[], fine (although I claim
> skb->cb[] size should be irrelevant, see our discussion on this topic
> with Florian)
> -> Just recompute the keys, using a local variable, from packet
> content. Yes, it will be more expensive, but hey, we get what we want.
>
> Same for sfq : your skb_get_hash_perturb() doesn't address the point I
> made earlier.
>
> It is only giving a false sense of security.
> I would rather not spread it.
> (Note that there is no documentation or changelog to explain the
> pro/cons)
>
> I doubt OVS would condense their flow keys in a single u32...
I'm largely siding with Eric on this. And the Choke argument is
a strong one.
Therefore I'm deferring this series for now, more thought and work
is definitely needed.
^ permalink raw reply [flat|nested] 15+ messages in thread
* RE: [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs
2015-03-04 18:39 [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Tom Herbert
` (6 preceding siblings ...)
2015-03-04 20:03 ` [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs Eric Dumazet
@ 2015-03-05 11:13 ` David Laight
2015-03-05 17:19 ` Tom Herbert
7 siblings, 1 reply; 15+ messages in thread
From: David Laight @ 2015-03-05 11:13 UTC (permalink / raw)
To: 'Tom Herbert', davem@davemloft.net,
netdev@vger.kernel.org, eric.dumazet@gmail.com, fw@strlen.de
From: Tom Herbert
...
> - The probability that two different flows randomly match to the same
> hash from skb_get_hash is 1/2^32. If such a collision were to happen,
> then the flows potentially also map to the same qdisc queue in
> perpetuity despite perturbation being performed. Given the very low
> probability of this occurring, this may in practice only be an
> academic issue. If it is a real concern, rekeying of the underlying
> mechanisms of skb->hash could be done.
...
The probability of a collision is much higher than that.
With 10000 flows it is (if my 'ballpark maths' is right) nearer 1/100.
David
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs
2015-03-05 11:13 ` David Laight
@ 2015-03-05 17:19 ` Tom Herbert
2015-03-05 17:59 ` Eric Dumazet
0 siblings, 1 reply; 15+ messages in thread
From: Tom Herbert @ 2015-03-05 17:19 UTC (permalink / raw)
To: David Laight
Cc: davem@davemloft.net, netdev@vger.kernel.org,
eric.dumazet@gmail.com, fw@strlen.de
On Thu, Mar 5, 2015 at 3:13 AM, David Laight <David.Laight@aculab.com> wrote:
> From: Tom Herbert
> ...
>> - The probability that two different flows randomly match to the same
>> hash from skb_get_hash is 1/2^32. If such a collision were to happen,
>> then the flows potentially also map to the same qdisc queue in
>> perpetuity despite perturbation being performed. Given the very low
>> probability of this occurring, this may in practice only be an
>> academic issue. If it is a real concern, rekeying of the underlying
>> mechanisms of skb->hash could be done.
> ...
>
> The probability of a collision is much higher than that.
> With 10000 flows it is (if my 'ballpark maths' is right) nearer 1/100.
>
Probably a little closer to 1/50. But this is the best case scenario
that assumes a completely random input. In reality the input could be
heavily biased substantially increasing collisions. Consider if
we're in the forwarding path for two NVEs tunneling using VXLAN.
Encapsulated flows will differ in flow_keys only in source port with
14 bits of entropy. Now with 100 flows collision probability is 1/25,
at at 1000 flows you're pretty much guaranteed collisions. In such a
scenario, any amount of perturbation, re-keying of hashes, even
switching to more fancy hash algorithms doesn't help-- the root
problem is that the input domain does not contain sufficient entropy.
This is not just a performance issue, this is an obvious security
issue. It's fairly easy to DOS 5-tuples at will by spoofing packets
with different 5-tuples. This is absolutely trivial with IPv6. Given
we want to DOS a TCP connection with four tuple <S,D,s,d> and source
address is of form w::x:y, we can just change to the source address to
be w::x^J:y:^J and copy rest of the four-tuple. For any J this creates
same src value in flow_keys as w::x:y, hence always the same hash.
There are similar ways to spoof 4-tuple with IPv4 tunnels like the
VXLAN case above.
So if we want to fix this, we need to address the limited information
used in flow_keys. 1) flow_keys needs more information such as full
IPv6 address, VLAN, GRE keyid, and flow label. 2) We need a hash
function that can take more bits of flow_kesy as input to avoid the
aliasing problems like we see with the IPv6 case. This is where I am
going with these patches.
Tom
> David
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 0/6] net: Call skb_get_hash in qdiscs
2015-03-05 17:19 ` Tom Herbert
@ 2015-03-05 17:59 ` Eric Dumazet
0 siblings, 0 replies; 15+ messages in thread
From: Eric Dumazet @ 2015-03-05 17:59 UTC (permalink / raw)
To: Tom Herbert
Cc: David Laight, davem@davemloft.net, netdev@vger.kernel.org,
fw@strlen.de
On Thu, 2015-03-05 at 09:19 -0800, Tom Herbert wrote:
> Probably a little closer to 1/50. But this is the best case scenario
> that assumes a completely random input. In reality the input could be
> heavily biased substantially increasing collisions. Consider if
> we're in the forwarding path for two NVEs tunneling using VXLAN.
> Encapsulated flows will differ in flow_keys only in source port with
> 14 bits of entropy. Now with 100 flows collision probability is 1/25,
> at at 1000 flows you're pretty much guaranteed collisions. In such a
> scenario, any amount of perturbation, re-keying of hashes, even
> switching to more fancy hash algorithms doesn't help-- the root
> problem is that the input domain does not contain sufficient entropy.
>
> This is not just a performance issue, this is an obvious security
> issue. It's fairly easy to DOS 5-tuples at will by spoofing packets
> with different 5-tuples. This is absolutely trivial with IPv6. Given
> we want to DOS a TCP connection with four tuple <S,D,s,d> and source
> address is of form w::x:y, we can just change to the source address to
> be w::x^J:y:^J and copy rest of the four-tuple. For any J this creates
> same src value in flow_keys as w::x:y, hence always the same hash.
> There are similar ways to spoof 4-tuple with IPv4 tunnels like the
> VXLAN case above.
>
> So if we want to fix this, we need to address the limited information
> used in flow_keys. 1) flow_keys needs more information such as full
> IPv6 address, VLAN, GRE keyid, and flow label. 2) We need a hash
> function that can take more bits of flow_kesy as input to avoid the
> aliasing problems like we see with the IPv6 case. This is where I am
> going with these patches.
I thought we added ipv6_addr_jhash() for these reasons ?
If you believe this issue is pressing, just change
flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
to use ipv6_addr_jhash()
Note: So far, rxhash never has been a security issue.
^ permalink raw reply [flat|nested] 15+ messages in thread