Netdev List
 help / color / mirror / Atom feed
* [PATCH v7 5/6] syncookies: use SipHash in place of SHA1
From: Jason A. Donenfeld @ 2016-12-21 23:02 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, edumazet, Linus Torvalds,
	Eric Biggers, Tom Herbert, ak, davem, luto,
	Jean-Philippe Aumasson
  Cc: Jason A. Donenfeld, Eric Dumazet
In-Reply-To: <20161221230216.25341-1-Jason@zx2c4.com>

SHA1 is slower and less secure than SipHash, and so replacing syncookie
generation with SipHash makes natural sense. Some BSDs have been doing
this for several years in fact.

The speedup should be similar -- and even more impressive -- to the
speedup from the sequence number fix in this series.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 20 ++++----------------
 net/ipv6/syncookies.c | 37 ++++++++++++++++---------------------
 2 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 3e88467d70ee..03bb068f8888 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -13,13 +13,13 @@
 #include <linux/tcp.h>
 #include <linux/slab.h>
 #include <linux/random.h>
-#include <linux/cryptohash.h>
+#include <linux/siphash.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <net/tcp.h>
 #include <net/route.h>
 
-static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
+static siphash_key_t syncookie_secret[2] __read_mostly;
 
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
@@ -48,24 +48,12 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
 #define TSBITS	6
 #define TSMASK	(((__u32)1 << TSBITS) - 1)
 
-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv4_cookie_scratch);
-
 static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
 		       u32 count, int c)
 {
-	__u32 *tmp;
-
 	net_get_random_once(syncookie_secret, sizeof(syncookie_secret));
-
-	tmp  = this_cpu_ptr(ipv4_cookie_scratch);
-	memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
-	tmp[0] = (__force u32)saddr;
-	tmp[1] = (__force u32)daddr;
-	tmp[2] = ((__force u32)sport << 16) + (__force u32)dport;
-	tmp[3] = count;
-	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
-
-	return tmp[17];
+	return siphash_4u32(saddr, daddr, (u32)sport << 16 | dport, count,
+			    syncookie_secret[c]);
 }
 
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index a4d49760bf43..be51fc0d99ad 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -16,7 +16,7 @@
 
 #include <linux/tcp.h>
 #include <linux/random.h>
-#include <linux/cryptohash.h>
+#include <linux/siphash.h>
 #include <linux/kernel.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
@@ -24,7 +24,7 @@
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
 
-static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
+static siphash_key_t syncookie6_secret[2] __read_mostly;
 
 /* RFC 2460, Section 8.3:
  * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
@@ -41,30 +41,25 @@ static __u16 const msstab[] = {
 	9000 - 60,
 };
 
-static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch);
-
 static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
 		       __be16 sport, __be16 dport, u32 count, int c)
 {
-	__u32 *tmp;
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		u32 count;
+		u16 sport;
+		u16 dport;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *saddr,
+		.daddr = *daddr,
+		.count = count,
+		.sport = sport,
+		.dport = dport
+	};
 
 	net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret));
-
-	tmp  = this_cpu_ptr(ipv6_cookie_scratch);
-
-	/*
-	 * we have 320 bits of information to hash, copy in the remaining
-	 * 192 bits required for sha_transform, from the syncookie6_secret
-	 * and overwrite the digest with the secret
-	 */
-	memcpy(tmp + 10, syncookie6_secret[c], 44);
-	memcpy(tmp, saddr, 16);
-	memcpy(tmp + 4, daddr, 16);
-	tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
-	tmp[9] = count;
-	sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
-
-	return tmp[17];
+	return siphash(&combined, offsetofend(typeof(combined), dport), syncookie6_secret[c]);
 }
 
 static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
-- 
2.11.0

^ permalink raw reply related

* [PATCH v7 6/6] siphash: implement HalfSipHash1-3 for hash tables
From: Jason A. Donenfeld @ 2016-12-21 23:02 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, edumazet, Linus Torvalds,
	Eric Biggers, Tom Herbert, ak, davem, luto,
	Jean-Philippe Aumasson
  Cc: Jason A. Donenfeld
In-Reply-To: <20161221230216.25341-1-Jason@zx2c4.com>

HalfSipHash, or hsiphash, is a shortened version of SipHash, which
generates 32-bit outputs using a weaker 64-bit key. It has *much* lower
security margins, and shouldn't be used for anything too sensitive, but
it could be used as a hashtable key function replacement, if the output
is never exposed, and if the security requirement is not too high.

The goal is to make this something that performance-critical jhash users
would be willing to use.

On 64-bit machines, HalfSipHash1-3 is slower than SipHash1-3, so we alias
SipHash1-3 to HalfSipHash1-3 on those systems.

64-bit x86_64:
[    0.509409] test_siphash:     SipHash2-4 cycles: 4049181
[    0.510650] test_siphash:     SipHash1-3 cycles: 2512884
[    0.512205] test_siphash: HalfSipHash1-3 cycles: 3429920
[    0.512904] test_siphash:    JenkinsHash cycles:  978267
So, we map hsiphash() -> SipHash1-3

32-bit x86:
[    0.509868] test_siphash:     SipHash2-4 cycles: 14812892
[    0.513601] test_siphash:     SipHash1-3 cycles:  9510710
[    0.515263] test_siphash: HalfSipHash1-3 cycles:  3856157
[    0.515952] test_siphash:    JenkinsHash cycles:  1148567
So, we map hsiphash() -> HalfSipHash1-3

hsiphash() is roughly 3 times slower than jhash(), but comes with a
considerable security improvement.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
---
 Documentation/siphash.txt |  75 +++++++++++
 include/linux/siphash.h   |  56 +++++++-
 lib/siphash.c             | 318 +++++++++++++++++++++++++++++++++++++++++++++-
 lib/test_siphash.c        | 139 ++++++++++++++++----
 4 files changed, 561 insertions(+), 27 deletions(-)

diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt
index 39ff7f0438e7..f93c1d7104c4 100644
--- a/Documentation/siphash.txt
+++ b/Documentation/siphash.txt
@@ -77,3 +77,78 @@ Linux implements the "2-4" variant of SipHash.
 
 Read the SipHash paper if you're interested in learning more:
 https://131002.net/siphash/siphash.pdf
+
+
+~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~
+
+HalfSipHash - SipHash's insecure younger cousin
+-----------------------------------------------
+Written by Jason A. Donenfeld <jason@zx2c4.com>
+
+On the off-chance that SipHash is not fast enough for your needs, you might be
+able to justify using HalfSipHash, a terrifying but potentially useful
+possibility. HalfSipHash cuts SipHash's rounds down from "2-4" to "1-3" and,
+even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output)
+instead of SipHash's 128-bit key. However, this may appeal to some
+high-performance `jhash` users.
+
+Danger!
+
+Do not ever use HalfSipHash except for as a hashtable key function, and only
+then when you can be absolutely certain that the outputs will never be
+transmitted out of the kernel. This is only remotely useful over `jhash` as a
+means of mitigating hashtable flooding denial of service attacks.
+
+1. Generating a key
+
+Keys should always be generated from a cryptographically secure source of
+random numbers, either using get_random_bytes or get_random_once:
+
+hsiphash_key_t key;
+get_random_bytes(key, sizeof(key));
+
+If you're not deriving your key from here, you're doing it wrong.
+
+2. Using the functions
+
+There are two variants of the function, one that takes a list of integers, and
+one that takes a buffer:
+
+u32 hsiphash(const void *data, size_t len, siphash_key_t key);
+
+And:
+
+u32 hsiphash_1u32(u32, hsiphash_key_t key);
+u32 hsiphash_2u32(u32, u32, hsiphash_key_t key);
+u32 hsiphash_3u32(u32, u32, u32, hsiphash_key_t key);
+u32 hsiphash_4u32(u32, u32, u32, u32, hsiphash_key_t key);
+
+If you pass the generic hsiphash function something of a constant length, it
+will constant fold at compile-time and automatically choose one of the
+optimized functions.
+
+3. Hashtable key function usage:
+
+struct some_hashtable {
+	DECLARE_HASHTABLE(hashtable, 8);
+	hsiphash_key_t key;
+};
+
+void init_hashtable(struct some_hashtable *table)
+{
+	get_random_bytes(table->key, sizeof(table->key));
+}
+
+static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
+{
+	return &table->hashtable[hsiphash(input, sizeof(*input), table->key) & (HASH_SIZE(table->hashtable) - 1)];
+}
+
+You may then iterate like usual over the returned hash bucket.
+
+4. Performance
+
+HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
+this will not be a problem, as the hashtable lookup isn't the bottleneck. And
+in general, this is probably a good sacrifice to make for the security and DoS
+resistance of HalfSipHash.
diff --git a/include/linux/siphash.h b/include/linux/siphash.h
index 7aa666eb00d9..efab44c654f3 100644
--- a/include/linux/siphash.h
+++ b/include/linux/siphash.h
@@ -5,7 +5,9 @@
  * SipHash: a fast short-input PRF
  * https://131002.net/siphash/
  *
- * This implementation is specifically for SipHash2-4.
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
  */
 
 #ifndef _LINUX_SIPHASH_H
@@ -76,4 +78,56 @@ static inline u64 siphash(const void *data, size_t len, const siphash_key_t key)
 	return ___siphash_aligned(data, len, key);
 }
 
+#if BITS_PER_LONG == 64
+typedef siphash_key_t hsiphash_key_t;
+#define HSIPHASH_ALIGNMENT SIPHASH_ALIGNMENT
+#else
+typedef u32 hsiphash_key_t[2];
+#define HSIPHASH_ALIGNMENT __alignof__(u32)
+#endif
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t key);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t key);
+#endif
+
+u32 hsiphash_1u32(const u32 a, const hsiphash_key_t key);
+u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t key);
+u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
+		  const hsiphash_key_t key);
+u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
+		  const hsiphash_key_t key);
+
+static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, const hsiphash_key_t key)
+{
+	if (__builtin_constant_p(len) && len == 4)
+		return hsiphash_1u32(le32_to_cpu(data[0]), key);
+	if (__builtin_constant_p(len) && len == 8)
+		return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     key);
+	if (__builtin_constant_p(len) && len == 12)
+		return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     le32_to_cpu(data[2]), key);
+	if (__builtin_constant_p(len) && len == 16)
+		return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     le32_to_cpu(data[2]), le32_to_cpu(data[3]),
+				     key);
+	return __hsiphash_aligned(data, len, key);
+}
+
+/**
+ * hsiphash - compute 32-bit hsiphash PRF value
+ * @data: buffer to hash
+ * @size: size of @data
+ * @key: the hsiphash key
+ */
+static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t key)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+		return __hsiphash_unaligned(data, len, key);
+#endif
+	return ___hsiphash_aligned(data, len, key);
+}
+
 #endif /* _LINUX_SIPHASH_H */
diff --git a/lib/siphash.c b/lib/siphash.c
index ff2151313667..e2481226d96c 100644
--- a/lib/siphash.c
+++ b/lib/siphash.c
@@ -5,7 +5,9 @@
  * SipHash: a fast short-input PRF
  * https://131002.net/siphash/
  *
- * This implementation is specifically for SipHash2-4.
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
  */
 
 #include <linux/siphash.h>
@@ -230,3 +232,317 @@ u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
 	POSTAMBLE
 }
 EXPORT_SYMBOL(siphash_3u32);
+
+#if BITS_PER_LONG == 64
+/* Note that this HalfSipHash1-3 implementation on 64-bit
+ * isn't actually HalfSipHash1-3 but rather SipHash1-3. */
+
+#define HSIPROUND SIPROUND
+#define HPREAMBLE(len) PREAMBLE(len)
+#define HPOSTAMBLE \
+	v3 ^= b; \
+	HSIPROUND; \
+	v0 ^= b; \
+	v2 ^= 0xff; \
+	HSIPROUND; \
+	HSIPROUND; \
+	HSIPROUND; \
+	return (v0 ^ v1) ^ (v2 ^ v3);
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t key)
+{
+	const u8 *end = data + len - (len % sizeof(u64));
+	const u8 left = len & (sizeof(u64) - 1);
+	u64 m;
+	HPREAMBLE(len)
+	for (; data != end; data += sizeof(u64)) {
+		m = le64_to_cpup(data);
+		v3 ^= m;
+		HSIPROUND;
+		v0 ^= m;
+	}
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+	if (left)
+		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+						  bytemask_from_count(left)));
+#else
+	switch (left) {
+	case 7: b |= ((u64)end[6]) << 48;
+	case 6: b |= ((u64)end[5]) << 40;
+	case 5: b |= ((u64)end[4]) << 32;
+	case 4: b |= le32_to_cpup(data); break;
+	case 3: b |= ((u64)end[2]) << 16;
+	case 2: b |= le16_to_cpup(data); break;
+	case 1: b |= end[0];
+	}
+#endif
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t key)
+{
+	const u8 *end = data + len - (len % sizeof(u64));
+	const u8 left = len & (sizeof(u64) - 1);
+	u64 m;
+	HPREAMBLE(len)
+	for (; data != end; data += sizeof(u64)) {
+		m = get_unaligned_le64(data);
+		v3 ^= m;
+		HSIPROUND;
+		v0 ^= m;
+	}
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+	if (left)
+		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+						  bytemask_from_count(left)));
+#else
+	switch (left) {
+	case 7: b |= ((u64)end[6]) << 48;
+	case 6: b |= ((u64)end[5]) << 40;
+	case 5: b |= ((u64)end[4]) << 32;
+	case 4: b |= get_unaligned_le32(end); break;
+	case 3: b |= ((u64)end[2]) << 16;
+	case 2: b |= get_unaligned_le16(end); break;
+	case 1: b |= end[0];
+	}
+#endif
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_unaligned);
+#endif
+
+/**
+ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t key)
+{
+	HPREAMBLE(4)
+	b |= first;
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_1u32);
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t key)
+{
+	u64 combined = (u64)second << 32 | first;
+	HPREAMBLE(8)
+	v3 ^= combined;
+	HSIPROUND;
+	v0 ^= combined;
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_2u32);
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+		  const hsiphash_key_t key)
+{
+	u64 combined = (u64)second << 32 | first;
+	HPREAMBLE(12)
+	v3 ^= combined;
+	HSIPROUND;
+	v0 ^= combined;
+	b |= third;
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_3u32);
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+		  const u32 forth, const hsiphash_key_t key)
+{
+	u64 combined = (u64)second << 32 | first;
+	HPREAMBLE(16)
+	v3 ^= combined;
+	HSIPROUND;
+	v0 ^= combined;
+	combined = (u64)forth << 32 | third;
+	v3 ^= combined;
+	HSIPROUND;
+	v0 ^= combined;
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_4u32);
+#else
+#define HSIPROUND \
+	do { \
+	v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
+	v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
+	v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
+	v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
+	} while(0)
+
+#define HPREAMBLE(len) \
+	u32 v0 = 0; \
+	u32 v1 = 0; \
+	u32 v2 = 0x6c796765U; \
+	u32 v3 = 0x74656462U; \
+	u32 b = ((u32)len) << 24; \
+	v3 ^= key[1]; \
+	v2 ^= key[0]; \
+	v1 ^= key[1]; \
+	v0 ^= key[0];
+
+#define HPOSTAMBLE \
+	v3 ^= b; \
+	HSIPROUND; \
+	v0 ^= b; \
+	v2 ^= 0xff; \
+	HSIPROUND; \
+	HSIPROUND; \
+	HSIPROUND; \
+	return v1 ^ v3;
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t key)
+{
+	const u8 *end = data + len - (len % sizeof(u32));
+	const u8 left = len & (sizeof(u32) - 1);
+	u32 m;
+	HPREAMBLE(len)
+	for (; data != end; data += sizeof(u32)) {
+		m = le32_to_cpup(data);
+		v3 ^= m;
+		HSIPROUND;
+		v0 ^= m;
+	}
+	switch (left) {
+	case 3: b |= ((u32)end[2]) << 16;
+	case 2: b |= le16_to_cpup(data); break;
+	case 1: b |= end[0];
+	}
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_aligned);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t key)
+{
+	const u8 *end = data + len - (len % sizeof(u32));
+	const u8 left = len & (sizeof(u32) - 1);
+	u32 m;
+	HPREAMBLE(len)
+	for (; data != end; data += sizeof(u32)) {
+		m = get_unaligned_le32(data);
+		v3 ^= m;
+		HSIPROUND;
+		v0 ^= m;
+	}
+	switch (left) {
+	case 3: b |= ((u32)end[2]) << 16;
+	case 2: b |= get_unaligned_le16(end); break;
+	case 1: b |= end[0];
+	}
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(__hsiphash_unaligned);
+#endif
+
+/**
+ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t key)
+{
+	HPREAMBLE(4)
+	v3 ^= first;
+	HSIPROUND;
+	v0 ^= first;
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_1u32);
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t key)
+{
+	HPREAMBLE(8)
+	v3 ^= first;
+	HSIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	HSIPROUND;
+	v0 ^= second;
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_2u32);
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+		  const hsiphash_key_t key)
+{
+	HPREAMBLE(12)
+	v3 ^= first;
+	HSIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	HSIPROUND;
+	v0 ^= second;
+	v3 ^= third;
+	HSIPROUND;
+	v0 ^= third;
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_3u32);
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+		  const u32 forth, const hsiphash_key_t key)
+{
+	HPREAMBLE(16)
+	v3 ^= first;
+	HSIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	HSIPROUND;
+	v0 ^= second;
+	v3 ^= third;
+	HSIPROUND;
+	v0 ^= third;
+	v3 ^= forth;
+	HSIPROUND;
+	v0 ^= forth;
+	HPOSTAMBLE
+}
+EXPORT_SYMBOL(hsiphash_4u32);
+#endif
diff --git a/lib/test_siphash.c b/lib/test_siphash.c
index e0ba2cf8dc67..ac291ec27fb6 100644
--- a/lib/test_siphash.c
+++ b/lib/test_siphash.c
@@ -7,7 +7,9 @@
  * SipHash: a fast short-input PRF
  * https://131002.net/siphash/
  *
- * This implementation is specifically for SipHash2-4.
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -18,10 +20,16 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 
-/* Test vectors taken from official reference source available at:
- *     https://131002.net/siphash/siphash24.c
+/* Test vectors taken from reference source available at:
+ *     https://github.com/veorq/SipHash
  */
-static const u64 test_vectors[64] = {
+
+
+
+static const siphash_key_t test_key_siphash =
+	{ 0x0706050403020100ULL , 0x0f0e0d0c0b0a0908ULL };
+
+static const u64 test_vectors_siphash[64] = {
 	0x726fdb47dd0e0e31ULL, 0x74f839c593dc67fdULL, 0x0d6c8009d9a94f5aULL,
 	0x85676696d7fb7e2dULL, 0xcf2794e0277187b7ULL, 0x18765564cd99a68dULL,
 	0xcbc9466e58fee3ceULL, 0xab0200f58b01d137ULL, 0x93f5f5799a932462ULL,
@@ -45,9 +53,64 @@ static const u64 test_vectors[64] = {
 	0x6ca4ecb15c5f91e1ULL, 0x9f626da15c9625f3ULL, 0xe51b38608ef25f57ULL,
 	0x958a324ceb064572ULL
 };
-static const siphash_key_t test_key =
+#if BITS_PER_LONG == 64
+static const hsiphash_key_t test_key_hsiphash =
 	{ 0x0706050403020100ULL , 0x0f0e0d0c0b0a0908ULL };
 
+static const u32 test_vectors_hsiphash[64] = {
+	0x050fc4dcU, 0x7d57ca93U, 0x4dc7d44dU,
+	0xe7ddf7fbU, 0x88d38328U, 0x49533b67U,
+	0xc59f22a7U, 0x9bb11140U, 0x8d299a8eU,
+	0x6c063de4U, 0x92ff097fU, 0xf94dc352U,
+	0x57b4d9a2U, 0x1229ffa7U, 0xc0f95d34U,
+	0x2a519956U, 0x7d908b66U, 0x63dbd80cU,
+	0xb473e63eU, 0x8d297d1cU, 0xa6cce040U,
+	0x2b45f844U, 0xa320872eU, 0xdae6c123U,
+	0x67349c8cU, 0x705b0979U, 0xca9913a5U,
+	0x4ade3b35U, 0xef6cd00dU, 0x4ab1e1f4U,
+	0x43c5e663U, 0x8c21d1bcU, 0x16a7b60dU,
+	0x7a8ff9bfU, 0x1f2a753eU, 0xbf186b91U,
+	0xada26206U, 0xa3c33057U, 0xae3a36a1U,
+	0x7b108392U, 0x99e41531U, 0x3f1ad944U,
+	0xc8138825U, 0xc28949a6U, 0xfaf8876bU,
+	0x9f042196U, 0x68b1d623U, 0x8b5114fdU,
+	0xdf074c46U, 0x12cc86b3U, 0x0a52098fU,
+	0x9d292f9aU, 0xa2f41f12U, 0x43a71ed0U,
+	0x73f0bce6U, 0x70a7e980U, 0x243c6d75U,
+	0xfdb71513U, 0xa67d8a08U, 0xb7e8f148U,
+	0xf7a644eeU, 0x0f1837f2U, 0x4b6694e0U,
+	0xb7bbb3a8U
+};
+#else
+static const hsiphash_key_t test_key_hsiphash =
+	{ 0x03020100U, 0x07060504U };
+
+static const u32 test_vectors_hsiphash[64] = {
+	0x5814c896U, 0xe7e864caU, 0xbc4b0e30U,
+	0x01539939U, 0x7e059ea6U, 0x88e3d89bU,
+	0xa0080b65U, 0x9d38d9d6U, 0x577999b1U,
+	0xc839caedU, 0xe4fa32cfU, 0x959246eeU,
+	0x6b28096cU, 0x66dd9cd6U, 0x16658a7cU,
+	0xd0257b04U, 0x8b31d501U, 0x2b1cd04bU,
+	0x06712339U, 0x522aca67U, 0x911bb605U,
+	0x90a65f0eU, 0xf826ef7bU, 0x62512debU,
+	0x57150ad7U, 0x5d473507U, 0x1ec47442U,
+	0xab64afd3U, 0x0a4100d0U, 0x6d2ce652U,
+	0x2331b6a3U, 0x08d8791aU, 0xbc6dda8dU,
+	0xe0f6c934U, 0xb0652033U, 0x9b9851ccU,
+	0x7c46fb7fU, 0x732ba8cbU, 0xf142997aU,
+	0xfcc9aa1bU, 0x05327eb2U, 0xe110131cU,
+	0xf9e5e7c0U, 0xa7d708a6U, 0x11795ab1U,
+	0x65671619U, 0x9f5fff91U, 0xd89c5267U,
+	0x007783ebU, 0x95766243U, 0xab639262U,
+	0x9c7e1390U, 0xc368dda6U, 0x38ddc455U,
+	0xfa13d379U, 0x979ea4e8U, 0x53ecd77eU,
+	0x2ee80657U, 0x33dbb66aU, 0xae3f0577U,
+	0x88b4c4ccU, 0x3e7f480bU, 0x74c1ebf8U,
+	0x87178304U
+};
+#endif
+
 static int __init siphash_test_init(void)
 {
 	u8 in[64] __aligned(SIPHASH_ALIGNMENT);
@@ -58,49 +121,75 @@ static int __init siphash_test_init(void)
 	for (i = 0; i < 64; ++i) {
 		in[i] = i;
 		in_unaligned[i + 1] = i;
-		if (siphash(in, i, test_key) != test_vectors[i]) {
-			pr_info("self-test aligned %u: FAIL\n", i + 1);
+		if (siphash(in, i, test_key_siphash) != test_vectors_siphash[i]) {
+			pr_info("siphash self-test aligned %u: FAIL\n", i + 1);
+			ret = -EINVAL;
+		}
+		if (siphash(in_unaligned + 1, i, test_key_siphash) != test_vectors_siphash[i]) {
+			pr_info("siphash self-test unaligned %u: FAIL\n", i + 1);
 			ret = -EINVAL;
 		}
-		if (siphash(in_unaligned + 1, i, test_key) != test_vectors[i]) {
-			pr_info("self-test unaligned %u: FAIL\n", i + 1);
+		if (hsiphash(in, i, test_key_hsiphash) != test_vectors_hsiphash[i]) {
+			pr_info("hsiphash self-test aligned %u: FAIL\n", i + 1);
+			ret = -EINVAL;
+		}
+		if (hsiphash(in_unaligned + 1, i, test_key_hsiphash) != test_vectors_hsiphash[i]) {
+			pr_info("hsiphash self-test unaligned %u: FAIL\n", i + 1);
 			ret = -EINVAL;
 		}
 	}
-	if (siphash_1u64(0x0706050403020100ULL, test_key) != test_vectors[8]) {
-		pr_info("self-test 1u64: FAIL\n");
+	if (siphash_1u64(0x0706050403020100ULL, test_key_siphash) != test_vectors_siphash[8]) {
+		pr_info("siphash self-test 1u64: FAIL\n");
 		ret = -EINVAL;
 	}
-	if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, test_key) != test_vectors[16]) {
-		pr_info("self-test 2u64: FAIL\n");
+	if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, test_key_siphash) != test_vectors_siphash[16]) {
+		pr_info("siphash self-test 2u64: FAIL\n");
 		ret = -EINVAL;
 	}
 	if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
-			 0x1716151413121110ULL, test_key) != test_vectors[24]) {
-		pr_info("self-test 3u64: FAIL\n");
+			 0x1716151413121110ULL, test_key_siphash) != test_vectors_siphash[24]) {
+		pr_info("siphash self-test 3u64: FAIL\n");
 		ret = -EINVAL;
 	}
 	if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
-			 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL, test_key) != test_vectors[32]) {
-		pr_info("self-test 4u64: FAIL\n");
+			 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL, test_key_siphash) != test_vectors_siphash[32]) {
+		pr_info("siphash self-test 4u64: FAIL\n");
 		ret = -EINVAL;
 	}
-	if (siphash_1u32(0x03020100U, test_key) != test_vectors[4]) {
-		pr_info("self-test 1u32: FAIL\n");
+	if (siphash_1u32(0x03020100U, test_key_siphash) != test_vectors_siphash[4]) {
+		pr_info("siphash self-test 1u32: FAIL\n");
 		ret = -EINVAL;
 	}
-	if (siphash_2u32(0x03020100U, 0x07060504U, test_key) != test_vectors[8]) {
-		pr_info("self-test 2u32: FAIL\n");
+	if (siphash_2u32(0x03020100U, 0x07060504U, test_key_siphash) != test_vectors_siphash[8]) {
+		pr_info("siphash self-test 2u32: FAIL\n");
 		ret = -EINVAL;
 	}
 	if (siphash_3u32(0x03020100U, 0x07060504U,
-			 0x0b0a0908U, test_key) != test_vectors[12]) {
-		pr_info("self-test 3u32: FAIL\n");
+			 0x0b0a0908U, test_key_siphash) != test_vectors_siphash[12]) {
+		pr_info("siphash self-test 3u32: FAIL\n");
 		ret = -EINVAL;
 	}
 	if (siphash_4u32(0x03020100U, 0x07060504U,
-			 0x0b0a0908U, 0x0f0e0d0cU, test_key) != test_vectors[16]) {
-		pr_info("self-test 4u32: FAIL\n");
+			 0x0b0a0908U, 0x0f0e0d0cU, test_key_siphash) != test_vectors_siphash[16]) {
+		pr_info("siphash self-test 4u32: FAIL\n");
+		ret = -EINVAL;
+	}
+	if (hsiphash_1u32(0x03020100U, test_key_hsiphash) != test_vectors_hsiphash[4]) {
+		pr_info("hsiphash self-test 1u32: FAIL\n");
+		ret = -EINVAL;
+	}
+	if (hsiphash_2u32(0x03020100U, 0x07060504U, test_key_hsiphash) != test_vectors_hsiphash[8]) {
+		pr_info("hsiphash self-test 2u32: FAIL\n");
+		ret = -EINVAL;
+	}
+	if (hsiphash_3u32(0x03020100U, 0x07060504U,
+			  0x0b0a0908U, test_key_hsiphash) != test_vectors_hsiphash[12]) {
+		pr_info("hsiphash self-test 3u32: FAIL\n");
+		ret = -EINVAL;
+	}
+	if (hsiphash_4u32(0x03020100U, 0x07060504U,
+			  0x0b0a0908U, 0x0f0e0d0cU, test_key_hsiphash) != test_vectors_hsiphash[16]) {
+		pr_info("hsiphash self-test 4u32: FAIL\n");
 		ret = -EINVAL;
 	}
 	if (!ret)
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH 2/2] net: wireless: fix to uses struct
From: Paul Bolle @ 2016-12-21 23:06 UTC (permalink / raw)
  To: Ozgur Karatas, johannes, David Miller
  Cc: Thomas Gleixner, linux-wireless, netdev, linux-kernel
In-Reply-To: <601101482360611-FqbOfpmrBZhxpj1cXAZ9Bg@public.gmane.org>

On Thu, 2016-12-22 at 01:50 +0300, Ozgur Karatas wrote:
> I don't have a problem with C programming

I'm sorry, but you do need to learn C, at a basic level, first.


Paul Bolle

^ permalink raw reply

* Re: [PATCH v7 3/6] random: use SipHash in place of MD5
From: Jason A. Donenfeld @ 2016-12-21 23:13 UTC (permalink / raw)
  To: Netdev, kernel-hardening, LKML, Linux Crypto Mailing List,
	David Laight, Ted Tso, Hannes Frederic Sowa, Eric Dumazet,
	Linus Torvalds, Eric Biggers, Tom Herbert, Andi Kleen,
	David Miller, Andy Lutomirski, Jean-Philippe Aumasson
  Cc: Jason A. Donenfeld
In-Reply-To: <20161221230216.25341-4-Jason@zx2c4.com>

Hi Ted,

On Thu, Dec 22, 2016 at 12:02 AM, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> This duplicates the current algorithm for get_random_int/long

I should have mentioned this directly in the commit message, which I
forgot to update: this v7 adds the time-based key rotation, which,
while not strictly necessary for ensuring the security of the RNG,
might help alleviate some concerns, as we talked about. Performance is
quite good on both 32-bit and 64-bit -- better than MD5 in both cases.

If you like this, terrific. If not, I'm happy to take this in whatever
direction you prefer, and implement whatever construction you think
best. There's been a lot of noise on this list about it; we can
continue to discuss more, or you can just tell me whatever you want to
do, and I'll implement it and that'll be the end of it. As you said,
we can always get something decent now and improve it later.

Alternatively, if you've decided in the end you prefer your batched
entropy approach using chacha, I'm happy to implement a polished
version of that here in this patch series (so that we can keep the `rm
lib/md5.c` commit.)

Just let me know how you'd like to proceed.

Thanks,
Jason

^ permalink raw reply

* Re: [PATCH 2/2] net: wireless: fix to uses struct
From: Ozgur Karatas @ 2016-12-21 23:16 UTC (permalink / raw)
  To: Paul Bolle, johannes, David Miller
  Cc: Thomas Gleixner, linux-wireless, netdev, linux-kernel
In-Reply-To: <1482361572.28171.8.camel@tiscali.nl>


22.12.2016, 01:06, "Paul Bolle" <pebolle@tiscali.nl>:
> On Thu, 2016-12-22 at 01:50 +0300, Ozgur Karatas wrote:
>>  I don't have a problem with C programming
>
> I'm sorry, but you do need to learn C, at a basic level, first.

Hmm, I don't like to discussion but I'm an assertive on C/C++.
So, I'm not into the Linux kernel, I writing code with C/C++ for many years. 
I'm having trouble using Linux tools and trying to learn git/diff/format-patch/etc. 

Also, I'm reading over 600 e-mails per day and I'm reading to Documentation (kernel). I learn :)

I don't have to problem with C, you can see my early codes and software (github).

I need to get a good sense of the coding style and Documentation.

And thank you.

Regards

> Paul Bolle

~Ozgur

^ permalink raw reply

* Re: [PATCH v7 3/6] random: use SipHash in place of MD5
From: Andy Lutomirski @ 2016-12-21 23:42 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Netdev, kernel-hardening@lists.openwall.com, LKML,
	Linux Crypto Mailing List, David Laight, Ted Tso,
	Hannes Frederic Sowa, Eric Dumazet, Linus Torvalds, Eric Biggers,
	Tom Herbert, Andi Kleen, David S. Miller, Jean-Philippe Aumasson
In-Reply-To: <20161221230216.25341-4-Jason@zx2c4.com>

On Wed, Dec 21, 2016 at 3:02 PM, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>  unsigned int get_random_int(void)
>  {
> -       __u32 *hash;
> -       unsigned int ret;
> -
> -       if (arch_get_random_int(&ret))
> -               return ret;
> -
> -       hash = get_cpu_var(get_random_int_hash);
> -
> -       hash[0] += current->pid + jiffies + random_get_entropy();
> -       md5_transform(hash, random_int_secret);
> -       ret = hash[0];
> -       put_cpu_var(get_random_int_hash);
> -
> -       return ret;
> +       unsigned int arch_result;
> +       u64 result;
> +       struct random_int_secret *secret;
> +
> +       if (arch_get_random_int(&arch_result))
> +               return arch_result;
> +
> +       secret = get_random_int_secret();
> +       result = siphash_3u64(secret->chaining, jiffies,
> +                             (u64)random_get_entropy() + current->pid,
> +                             secret->secret);
> +       secret->chaining += result;
> +       put_cpu_var(secret);
> +       return result;
>  }
>  EXPORT_SYMBOL(get_random_int);

Hmm.  I haven't tried to prove anything for real.  But here goes (in
the random oracle model):

Suppose I'm an attacker and I don't know the secret or the chaining
value.  Then, regardless of what the entropy is, I can't predict the
numbers.

Now suppose I do know the secret and the chaining value due to some
leak.  If I want to deduce prior outputs, I think I'm stuck: I'd need
to find a value "result" such that prev_chaining + result = chaining
and result = H(prev_chaining, ..., secret);.  I don't think this can
be done efficiently in the random oracle model regardless of what the
"..." is.

But, if I know the secret and chaining value, I can predict the next
output assuming I can guess the entropy.  What's worse is that, even
if I can't guess the entropy, if I *observe* the next output then I
can calculate the next chaining value.

So this is probably good enough, and making it better is hard.  Changing it to:

u64 entropy = (u64)random_get_entropy() + current->pid;
result = siphash(..., entropy, ...);
secret->chaining += result + entropy;

would reduce this problem by forcing an attacker to brute-force the
entropy on each iteration, which is probably an improvement.

To fully fix it, something like "catastrophic reseeding" would be
needed, but that's hard to get right.

(An aside: on x86 at least, using two percpu variables is faster
because directly percpu access is essentially free, whereas getting
the address of a percpu variable is not free.)

^ permalink raw reply

* Re: [PATCH net-next 00/10] netcp: enhancements and minor fixes
From: Murali Karicheri @ 2016-12-21 23:50 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-omap, grygorii.strashko, mugunthanvnm, linux-kernel,
	arnd, devicetree, mark.rutland, robh+dt
In-Reply-To: <20161220.190315.439054497576758011.davem@davemloft.net>

David,

On 12/20/2016 07:03 PM, David Miller wrote:
> 
> The net-next tree is not open, do not resubmit this series until it
> is open again.
> 
> Thanks.
> 
Ok. Thanks

-- 
Murali Karicheri
Linux Kernel, Keystone

^ permalink raw reply

* Food for thought: could cgroup+bpf live in a cgroup v1-compatible controller?
From: Andy Lutomirski @ 2016-12-21 23:55 UTC (permalink / raw)
  To: Daniel Mack, Alexei Starovoitov, Mickaël Salaün,
	Kees Cook, Jann Horn, Tejun Heo, David Ahern, David S. Miller,
	Thomas Graf, Michael Kerrisk, Peter Zijlstra, Linux API,
	linux-kernel@vger.kernel.org, Network Development, John Stultz,
	Eric W. Biederman

It seems to be that all of the new cgroup+bpf hooks and all of the
proposed networking-related ones that I'm aware of look at
sock_cgroup_ptr().  I'm wondering if this could me made cgroup v1
compatible?

As far as I can tell, this could be done with no changes at all to the
networking code and only minor changes to the cgroup code.
Specifically, there would be a new "socket" controller.  Its effect
would be that cgroup_sk_alloc() would load the current socket cgroup
instead of the current default cgroup, assuming that a socket cgroup
were installed.

Would this work?  I realize that there a moratorium on new fields in
sock (for good reasons), but this would require a new field or even
have a significant effect on the meaning of existing fields.  Instead
it would just change how the cgroup that's loaded into the existing
field is selected.

Would this be doable?  If so, would it be useful?

(If this were done, then presumably cgroup+lsm+bpf would consider
becoming a controller as well.)

--Andy

^ permalink raw reply

* Re: [PATCH] ip: vfinfo: remove code duplication for IFLA_VF_RSS_QUERY_EN
From: Stephen Hemminger @ 2016-12-22  0:05 UTC (permalink / raw)
  To: Julien Fortin; +Cc: netdev, phil
In-Reply-To: <20161216163605.19728-1-julien@cumulusnetworks.com>

On Fri, 16 Dec 2016 17:36:05 +0100
Julien Fortin <julien@cumulusnetworks.com> wrote:

> From: Julien Fortin <julien@cumulusnetworks.com>
> 
> Fixes: 4fb4a10e120b1 ("ipaddress: Print IFLA_VF_QUERY_RSS_EN setting”)
> 
> Signed-off-by: Julien Fortin <julien@cumulusnetworks.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH iproute2/net-next 0/2] tc: flower: enhance mask support
From: Stephen Hemminger @ 2016-12-22  0:09 UTC (permalink / raw)
  To: Simon Horman; +Cc: netdev
In-Reply-To: <1481896477-13497-1-git-send-email-simon.horman@netronome.com>

On Fri, 16 Dec 2016 14:54:35 +0100
Simon Horman <simon.horman@netronome.com> wrote:

> Hi,
> 
> this short series enhances mask support for tc flower by:
> * Documenting existing mask support for *_ip parameters
> * Allowing *_mac options to accept a mask.
>   This makes use of existing kernel features.
> 
> Based on net-next +
> "[PATCH iproute2 0/2] Add dest UDP port to IP tunnel parameters"
> 
> Simon Horman (2):
>   tc: flower: document that *_ip parameters take a PREFIX as an
>     argument.
>   tc: flower: Allow *_mac options to accept a mask
> 
>  man/man8/tc-flower.8 | 41 +++++++++++++++++++++++------------------
>  tc/f_flower.c        | 51 ++++++++++++++++++++++++++++++++++++++++-----------
>  2 files changed, 63 insertions(+), 29 deletions(-)
> 

Thanks, applied to net-next.
Initially applied to wrong tree then reapplied to right tree.

^ permalink raw reply

* Re: [PATCH iproute2 0/4] ip vrf fixups
From: Stephen Hemminger @ 2016-12-22  0:09 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev
In-Reply-To: <1481832422-10267-1-git-send-email-dsa@cumulusnetworks.com>

On Thu, 15 Dec 2016 12:06:58 -0800
David Ahern <dsa@cumulusnetworks.com> wrote:

> Some minor cleanups to the 'ip vrf' command.
> 
> Patch 1 moves the CGROUP_BPF hint to the failure of prog_load since it
> fails first.
> 
> Patch 2 refactors ipvrf_identify. The action part is moved to a function
> that can be used standalone and in the process flipped to fopen/fgets for
> robustness should the cgroups file grow larger than 4k.
> 
> Patch 3 fixes the path switching to "default" VRF.
> 
> Patch 4 moves a task to default VRF when switching namespaces.
> 
> David Ahern (4):
>   ip vrf: Move kernel config hint to prog_load failure
>   ip vrf: Refactor ipvrf_identify
>   ip vrf: Fix reset to default VRF
>   ip netns: Reset vrf to default VRF on namespace switch
> 
>  ip/ip_common.h |   1 +
>  ip/ipnetns.c   |   5 +++
>  ip/ipvrf.c     | 103 +++++++++++++++++++++++++++++++++++----------------------
>  3 files changed, 69 insertions(+), 40 deletions(-)
> 

Applied thanks.

^ permalink raw reply

* Re: [PATCH iproute2 0/2] Man page fixes
From: Stephen Hemminger @ 2016-12-22  0:10 UTC (permalink / raw)
  To: Roi Dayan; +Cc: netdev, Amir Vadai, Hadar Hen Zion
In-Reply-To: <1481607232-1342-1-git-send-email-roid@mellanox.com>

On Tue, 13 Dec 2016 07:33:50 +0200
Roi Dayan <roid@mellanox.com> wrote:

> Hi,
> 
> The 2 patches are man page related only.
> First fixes a typo and second adding missing man page to the Makefile.
> 
> Thanks
> 
> Roi Dayan (2):
>   tc: flower: Fix typo in the flower man page
>   tc: tunnel_key: Add tc-tunnel_key man page to Makefile
> 
>  man/man8/Makefile    | 1 +
>  man/man8/tc-flower.8 | 4 ++--
>  2 files changed, 3 insertions(+), 2 deletions(-)
> 

Applied thanks.

^ permalink raw reply

* RE: [PATCH] net: add one ethtool option to set relax ordering mode
From: maowenan @ 2016-12-22  0:10 UTC (permalink / raw)
  To: maowenan, Andrew Lunn
  Cc: netdev@vger.kernel.org, jeffrey.t.kirsher@intel.com,
	weiyongjun (A), davem@davemloft.net
In-Reply-To: <20161208141153.GI26852@lunn.ch>

Hi,

> -----Original Message-----
> From: maowenan
> Sent: Monday, December 12, 2016 4:29 PM
> To: 'Andrew Lunn'
> Cc: netdev@vger.kernel.org; jeffrey.t.kirsher@intel.com; weiyongjun (A)
> Subject: RE: [PATCH] net: add one ethtool option to set relax ordering mode
> 
> 
> 
> > -----Original Message-----
> > From: netdev-owner@vger.kernel.org
> > [mailto:netdev-owner@vger.kernel.org]
> > On Behalf Of Andrew Lunn
> > Sent: Thursday, December 08, 2016 10:12 PM
> > To: maowenan
> > Cc: netdev@vger.kernel.org; jeffrey.t.kirsher@intel.com
> > Subject: Re: [PATCH] net: add one ethtool option to set relax ordering
> > mode
> >
> > On Thu, Dec 08, 2016 at 02:51:37PM +0800, Mao Wenan wrote:
> > > This patch provides one way to set/unset IXGBE NIC TX and RX relax
> > > ordering mode, which can be set by ethtool.
> > > Relax ordering is one mode of 82599 NIC, to enable this mode can
> > > enhance the performance for some cpu architecure.
> > > example:
> > > ethtool -s enp1s0f0 relaxorder off
> > > ethtool -s enp1s0f0 relaxorder on
> >
> > Since this is a simple on/off, could it not be done with a feature?
> > ethtool --feature?
> >
> > 	Andrew
> 
> Hello Andrew,
> 	Thank you for your comments.
> 	I get your idea about using ethtool -K|--feature is good for this feature,
> right?
> My original concert is about this is a relax ordering mode exist in 82599, it is
> the hardware related feature. And ethtool -s option is related hardware of phy
> and other (e.g: speed, duplex...), it is very easy to implement in do_sset().
> But ethtool -K is mainly used for protocol offload,
>         ethtool -K|--features|--offload DEVNAME Set protocol offload and
> other features
>                 FEATURE on|off ...
> @Jeff Kirsher, what's your comments?

@Jeff Kirsher and @David Miller, do you have any other comments about this patch which wants to add ethtool option to on|off relax ordering with 82599?  Thank you.
Here Andrew have comment about using -K or --features on|off relax ordering.







 

^ permalink raw reply

* Re: HalfSipHash Acceptable Usage
From: George Spelvin @ 2016-12-22  0:18 UTC (permalink / raw)
  To: linux, tytso
  Cc: ak, davem, David.Laight, djb, ebiggers3, eric.dumazet, hannes,
	Jason, jeanphilippe.aumasson, kernel-hardening, linux-crypto,
	linux-kernel, luto, netdev, tom, torvalds, vegard.nossum
In-Reply-To: <20161221222702.h2vboms776zpgpi4@thunk.org>

Theodore Ts'o wrote:
> On Wed, Dec 21, 2016 at 01:37:51PM -0500, George Spelvin wrote:
>> SipHash annihilates the competition on 64-bit superscalar hardware.
>> SipHash dominates the field on 64-bit in-order hardware.
>> SipHash wins easily on 32-bit hardware *with enough registers*.
>> On register-starved 32-bit machines, it really struggles.

> And "with enough registers" includes ARM and MIPS, right?

Yes.  As a matter of fact, 32-bit ARM does particularly well
on 64-bit SipHash due to its shift+op instructions.

There is a noticeable performance drop, but nothing catastrophic.

The main thing I've been worried about is all the flow tracking
and NAT done by small home routers, and that's addressed by using
HalfSipHash for the hash tables.  They don't *initiate* a lot of
TCP sessions.

> So the only
> real problem is 32-bit x86, and you're right, at that point, only
> people who might care are people who are using a space-radiation
> hardened 386 --- and they're not likely to be doing high throughput
> TCP connections.  :-)

The only requirement on performance is "don't make DaveM angry." :-)

I was just trying to answer the question of why we *worried* about the
performance, not specifically argue that we *should* use HalfSipHash.

^ permalink raw reply

* [PATCH] skfp: hwmtm: Use proper logging macros, correct mismatches
From: Joe Perches @ 2016-12-22  0:41 UTC (permalink / raw)
  To: Colin King, linux-kernel; +Cc: netdev

Logging macros should allow format and argument validation.
The DB_TX, DB_RX, and DB_GEN macros did not.

Update the macros and uses and add no_printk validation to the
previously compiled away #ifndef DEBUG variants.

Done with coccinelle and some typing.

Signed-off-by: Joe Perches <joe@perches.com>
---

Compile tested DEBUG and non-DEBUG only, no hardware

 drivers/net/fddi/skfp/h/hwmtm.h |  24 ++++--
 drivers/net/fddi/skfp/hwmtm.c   | 176 ++++++++++++++++++++--------------------
 2 files changed, 106 insertions(+), 94 deletions(-)

diff --git a/drivers/net/fddi/skfp/h/hwmtm.h b/drivers/net/fddi/skfp/h/hwmtm.h
index 4ca2341d7f06..123cfa09c354 100644
--- a/drivers/net/fddi/skfp/h/hwmtm.h
+++ b/drivers/net/fddi/skfp/h/hwmtm.h
@@ -168,13 +168,25 @@ struct os_debug {
 #define DB_P	debug
 #endif
 
-#define DB_RX(a,b,c,lev) if (DB_P.d_os.hwm_rx >= (lev))	printf(a,b,c)
-#define DB_TX(a,b,c,lev) if (DB_P.d_os.hwm_tx >= (lev))	printf(a,b,c)
-#define DB_GEN(a,b,c,lev) if (DB_P.d_os.hwm_gen >= (lev)) printf(a,b,c)
+#define DB_RX(lev, fmt, ...)						\
+do {									\
+	if (DB_P.d_os.hwm_rx >= (lev))					\
+		printf(fmt "\n", ##__VA_ARGS__);			\
+} while (0)
+#define DB_TX(lev, fmt, ...)						\
+do {									\
+	if (DB_P.d_os.hwm_tx >= (lev))					\
+		printf(fmt "\n", ##__VA_ARGS__);			\
+} while (0)
+#define DB_GEN(lev, fmt, ...)						\
+do {									\
+	if (DB_P.d_os.hwm_gen >= (lev))					\
+		printf(fmt "\n", ##__VA_ARGS__);			\
+} while (0)
 #else	/* DEBUG */
-#define DB_RX(a,b,c,lev)
-#define DB_TX(a,b,c,lev)
-#define DB_GEN(a,b,c,lev)
+#define DB_RX(lev, fmt, ...)	no_printk(fmt "\n", ##__VA_ARGS__)
+#define DB_TX(lev, fmt, ...)	no_printk(fmt "\n", ##__VA_ARGS__)
+#define DB_GEN(lev, fmt, ...)	no_printk(fmt "\n", ##__VA_ARGS__)
 #endif	/* DEBUG */
 
 #ifndef	SK_BREAK
diff --git a/drivers/net/fddi/skfp/hwmtm.c b/drivers/net/fddi/skfp/hwmtm.c
index d0a68bdd5f63..4937d36a9e1c 100644
--- a/drivers/net/fddi/skfp/hwmtm.c
+++ b/drivers/net/fddi/skfp/hwmtm.c
@@ -330,7 +330,7 @@ static u_long init_descr_ring(struct s_smc *smc,
 	union s_fp_descr volatile *d2 ;
 	u_long	phys ;
 
-	DB_GEN("descr ring starts at = %x ",(void *)start,0,3) ;
+	DB_GEN(3, "descr ring starts at = %p", start);
 	for (i=count-1, d1=start; i ; i--) {
 		d2 = d1 ;
 		d1++ ;		/* descr is owned by the host */
@@ -339,7 +339,7 @@ static u_long init_descr_ring(struct s_smc *smc,
 		phys = mac_drv_virt2phys(smc,(void *)d1) ;
 		d2->r.rxd_nrdadr = cpu_to_le32(phys) ;
 	}
-	DB_GEN("descr ring ends at = %x ",(void *)d1,0,3) ;
+	DB_GEN(3, "descr ring ends at = %p", d1);
 	d1->r.rxd_rbctrl = cpu_to_le32(BMU_CHECK) ;
 	d1->r.rxd_next = &start->r ;
 	phys = mac_drv_virt2phys(smc,(void *)start) ;
@@ -364,7 +364,7 @@ static void init_txd_ring(struct s_smc *smc)
 	ds = (struct s_smt_fp_txd volatile *) ((char *)smc->os.hwm.descr_p +
 		SMT_R1_RXD_COUNT*sizeof(struct s_smt_fp_rxd)) ;
 	queue = smc->hw.fp.tx[QUEUE_A0] ;
-	DB_GEN("Init async TxD ring, %d TxDs ",HWM_ASYNC_TXD_COUNT,0,3) ;
+	DB_GEN(3, "Init async TxD ring, %d TxDs", HWM_ASYNC_TXD_COUNT);
 	(void)init_descr_ring(smc,(union s_fp_descr volatile *)ds,
 		HWM_ASYNC_TXD_COUNT) ;
 	phys = le32_to_cpu(ds->txd_ntdadr) ;
@@ -378,7 +378,7 @@ static void init_txd_ring(struct s_smc *smc)
 	ds = (struct s_smt_fp_txd volatile *) ((char *)ds +
 		HWM_ASYNC_TXD_COUNT*sizeof(struct s_smt_fp_txd)) ;
 	queue = smc->hw.fp.tx[QUEUE_S] ;
-	DB_GEN("Init sync TxD ring, %d TxDs ",HWM_SYNC_TXD_COUNT,0,3) ;
+	DB_GEN(3, "Init sync TxD ring, %d TxDs", HWM_SYNC_TXD_COUNT);
 	(void)init_descr_ring(smc,(union s_fp_descr volatile *)ds,
 		HWM_SYNC_TXD_COUNT) ;
 	phys = le32_to_cpu(ds->txd_ntdadr) ;
@@ -400,7 +400,7 @@ static void init_rxd_ring(struct s_smc *smc)
 	 */
 	ds = (struct s_smt_fp_rxd volatile *) smc->os.hwm.descr_p ;
 	queue = smc->hw.fp.rx[QUEUE_R1] ;
-	DB_GEN("Init RxD ring, %d RxDs ",SMT_R1_RXD_COUNT,0,3) ;
+	DB_GEN(3, "Init RxD ring, %d RxDs", SMT_R1_RXD_COUNT);
 	(void)init_descr_ring(smc,(union s_fp_descr volatile *)ds,
 		SMT_R1_RXD_COUNT) ;
 	phys = le32_to_cpu(ds->rxd_nrdadr) ;
@@ -469,11 +469,11 @@ void init_fddi_driver(struct s_smc *smc, u_char *mac_addr)
 	 */
 	i = 16 - ((long)smc->os.hwm.descr_p & 0xf) ;
 	if (i != 16) {
-		DB_GEN("i = %d",i,0,3) ;
+		DB_GEN(3, "i = %d", i);
 		smc->os.hwm.descr_p = (union s_fp_descr volatile *)
 			((char *)smc->os.hwm.descr_p+i) ;
 	}
-	DB_GEN("pt to descr area = %x",(void *)smc->os.hwm.descr_p,0,3) ;
+	DB_GEN(3, "pt to descr area = %p", smc->os.hwm.descr_p);
 
 	init_txd_ring(smc) ;
 	init_rxd_ring(smc) ;
@@ -501,7 +501,7 @@ SMbuf *smt_get_mbuf(struct s_smc *smc)
 		mb->sm_off = 8 ;
 		mb->sm_use_count = 1 ;
 	}
-	DB_GEN("get SMbuf: mb = %x",(void *)mb,0,3) ;
+	DB_GEN(3, "get SMbuf: mb = %p", mb);
 	return mb;	/* May be NULL */
 }
 
@@ -510,14 +510,14 @@ void smt_free_mbuf(struct s_smc *smc, SMbuf *mb)
 
 	if (mb) {
 		mb->sm_use_count-- ;
-		DB_GEN("free_mbuf: sm_use_count = %d",mb->sm_use_count,0,3) ;
+		DB_GEN(3, "free_mbuf: sm_use_count = %d", mb->sm_use_count);
 		/*
 		 * If the use_count is != zero the MBuf is queued
 		 * more than once and must not queued into the
 		 * free MBuf queue
 		 */
 		if (!mb->sm_use_count) {
-			DB_GEN("free SMbuf: mb = %x",(void *)mb,0,3) ;
+			DB_GEN(3, "free SMbuf: mb = %p", mb);
 #ifndef	COMMON_MB_POOL
 			mb->sm_next = smc->os.hwm.mbuf_pool.mb_free ;
 			smc->os.hwm.mbuf_pool.mb_free = mb ;
@@ -741,7 +741,7 @@ void fddi_isr(struct s_smc *smc)
 
 	while ((is = GET_ISR() & ISR_MASK)) {
 		NDD_TRACE("CH0B",is,0,0) ;
-		DB_GEN("ISA = 0x%x",is,0,7) ;
+		DB_GEN(7, "ISA = 0x%lx", is);
 
 		if (is & IMASK_SLOW) {
 			NDD_TRACE("CH1b",is,0,0) ;
@@ -754,20 +754,20 @@ void fddi_isr(struct s_smc *smc)
 			if (is & IS_MINTR1) {	/* FORMAC+ STU1(U/L) */
 				stu = inpw(FM_A(FM_ST1U)) ;
 				stl = inpw(FM_A(FM_ST1L)) ;
-				DB_GEN("Slow transmit complete",0,0,6) ;
+				DB_GEN(6, "Slow transmit complete");
 				mac1_irq(smc,stu,stl) ;
 			}
 			if (is & IS_MINTR2) {	/* FORMAC+ STU2(U/L) */
 				stu= inpw(FM_A(FM_ST2U)) ;
 				stl= inpw(FM_A(FM_ST2L)) ;
-				DB_GEN("Slow receive complete",0,0,6) ;
-				DB_GEN("stl = %x : stu = %x",stl,stu,7) ;
+				DB_GEN(6, "Slow receive complete");
+				DB_GEN(7, "stl = %x : stu = %x", stl, stu);
 				mac2_irq(smc,stu,stl) ;
 			}
 			if (is & IS_MINTR3) {	/* FORMAC+ STU3(U/L) */
 				stu= inpw(FM_A(FM_ST3U)) ;
 				stl= inpw(FM_A(FM_ST3L)) ;
-				DB_GEN("FORMAC Mode Register 3",0,0,6) ;
+				DB_GEN(6, "FORMAC Mode Register 3");
 				mac3_irq(smc,stu,stl) ;
 			}
 			if (is & IS_TIMINT) {	/* Timer 82C54-2 */
@@ -814,7 +814,7 @@ void fddi_isr(struct s_smc *smc)
 		 *	Fast Tx complete Async/Sync Queue (BMU service)
 		 */
 		if (is & (IS_XS_F|IS_XA_F)) {
-			DB_GEN("Fast tx complete queue",0,0,6) ;
+			DB_GEN(6, "Fast tx complete queue");
 			/*
 			 * clear IRQ, Note: no IRQ is lost, because
 			 * 	we always service both queues
@@ -829,7 +829,7 @@ void fddi_isr(struct s_smc *smc)
 		 *	Fast Rx Complete (BMU service)
 		 */
 		if (is & IS_R1_F) {
-			DB_GEN("Fast receive complete",0,0,6) ;
+			DB_GEN(6, "Fast receive complete");
 			/* clear IRQ */
 #ifndef USE_BREAK_ISR
 			outpd(ADDR(B4_R1_CSR),CSR_IRQ_CL_F) ;
@@ -1083,13 +1083,13 @@ void process_receive(struct s_smc *smc)
 #endif
 		n = 0 ;
 		do {
-			DB_RX("Check RxD %x for OWN and EOF",(void *)r,0,5) ;
+			DB_RX(5, "Check RxD %p for OWN and EOF", r);
 			DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORCPU) ;
 			rbctrl = le32_to_cpu(CR_READ(r->rxd_rbctrl));
 
 			if (rbctrl & BMU_OWN) {
 				NDD_TRACE("RHxE",r,rfsw,rbctrl) ;
-				DB_RX("End of RxDs",0,0,4) ;
+				DB_RX(4, "End of RxDs");
 				goto rx_end ;
 			}
 			/*
@@ -1136,19 +1136,19 @@ void process_receive(struct s_smc *smc)
 			rx_used-- ;
 		} while (!(rbctrl & BMU_EOF)) ;
 		used_frags = frag_count ;
-		DB_RX("EOF set in RxD, used_frags = %d ",used_frags,0,5) ;
+		DB_RX(5, "EOF set in RxD, used_frags = %d", used_frags);
 
 		/* may be next 2 DRV_BUF_FLUSH() can be skipped, because */
 		/* BMU_ST_BUF will not be changed by the ASIC */
 		DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORCPU) ;
 		while (rx_used && !(r->rxd_rbctrl & cpu_to_le32(BMU_ST_BUF))) {
-			DB_RX("Check STF bit in %x",(void *)r,0,5) ;
+			DB_RX(5, "Check STF bit in %p", r);
 			r = r->rxd_next ;
 			DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORCPU) ;
 			frag_count++ ;
 			rx_used-- ;
 		}
-		DB_RX("STF bit found",0,0,5) ;
+		DB_RX(5, "STF bit found");
 
 		/*
 		 * The received frame is finished for the process receive
@@ -1164,7 +1164,7 @@ void process_receive(struct s_smc *smc)
 		rxd->rxd_rbctrl &= cpu_to_le32(~BMU_STF) ;
 
 		for (r=rxd, i=frag_count ; i ; r=r->rxd_next, i--){
-			DB_RX("dma_complete for RxD %x",(void *)r,0,5) ;
+			DB_RX(5, "dma_complete for RxD %p", r);
 			dma_complete(smc,(union s_fp_descr volatile *)r,DMA_WR);
 		}
 		smc->hw.fp.err_stats.err_valid++ ;
@@ -1173,34 +1173,34 @@ void process_receive(struct s_smc *smc)
 		/* the length of the data including the FC */
 		len = (rfsw & RD_LENGTH) - 4 ;
 
-		DB_RX("frame length = %d",len,0,4) ;
+		DB_RX(4, "frame length = %d", len);
 		/*
 		 * check the frame_length and all error flags
 		 */
 		if (rfsw & (RX_MSRABT|RX_FS_E|RX_FS_CRC|RX_FS_IMPL)){
 			if (rfsw & RD_S_MSRABT) {
-				DB_RX("Frame aborted by the FORMAC",0,0,2) ;
+				DB_RX(2, "Frame aborted by the FORMAC");
 				smc->hw.fp.err_stats.err_abort++ ;
 			}
 			/*
 			 * check frame status
 			 */
 			if (rfsw & RD_S_SEAC2) {
-				DB_RX("E-Indicator set",0,0,2) ;
+				DB_RX(2, "E-Indicator set");
 				smc->hw.fp.err_stats.err_e_indicator++ ;
 			}
 			if (rfsw & RD_S_SFRMERR) {
-				DB_RX("CRC error",0,0,2) ;
+				DB_RX(2, "CRC error");
 				smc->hw.fp.err_stats.err_crc++ ;
 			}
 			if (rfsw & RX_FS_IMPL) {
-				DB_RX("Implementer frame",0,0,2) ;
+				DB_RX(2, "Implementer frame");
 				smc->hw.fp.err_stats.err_imp_frame++ ;
 			}
 			goto abort_frame ;
 		}
 		if (len > FDDI_RAW_MTU-4) {
-			DB_RX("Frame too long error",0,0,2) ;
+			DB_RX(2, "Frame too long error");
 			smc->hw.fp.err_stats.err_too_long++ ;
 			goto abort_frame ;
 		}
@@ -1209,12 +1209,12 @@ void process_receive(struct s_smc *smc)
 		 * of aborded frames to the BMU
 		 */
 		if (len <= 4) {
-			DB_RX("Frame length = 0",0,0,2) ;
+			DB_RX(2, "Frame length = 0");
 			goto abort_frame ;
 		}
 
 		if (len != (n-4)) {
-			DB_RX("BMU: rx len differs: [%d:%d]",len,n,4);
+			DB_RX(4, "BMU: rx len differs: [%d:%d]", len, n);
 			smc->os.hwm.rx_len_error++ ;
 			goto abort_frame ;
 		}
@@ -1223,7 +1223,7 @@ void process_receive(struct s_smc *smc)
 		 * Check SA == MA
 		 */
 		virt = (u_char far *) rxd->rxd_virt ;
-		DB_RX("FC = %x",*virt,0,2) ;
+		DB_RX(2, "FC = %x", *virt);
 		if (virt[12] == MA[5] &&
 		    virt[11] == MA[4] &&
 		    virt[10] == MA[3] &&
@@ -1250,7 +1250,7 @@ void process_receive(struct s_smc *smc)
 					    virt[3] != MA[2] ||
 					    virt[2] != MA[1] ||
 					    virt[1] != MA[0]) {
-						DB_RX("DA != MA and not multi- or broadcast",0,0,2) ;
+						DB_RX(2, "DA != MA and not multi- or broadcast");
 						goto abort_frame ;
 					}
 				}
@@ -1259,13 +1259,13 @@ void process_receive(struct s_smc *smc)
 			/*
 			 * LLC frame received
 			 */
-			DB_RX("LLC - receive",0,0,4) ;
+			DB_RX(4, "LLC - receive");
 			mac_drv_rx_complete(smc,rxd,frag_count,len) ;
 		}
 		else {
 			if (!(mb = smt_get_mbuf(smc))) {
 				smc->hw.fp.err_stats.err_no_buf++ ;
-				DB_RX("No SMbuf; receive terminated",0,0,4) ;
+				DB_RX(4, "No SMbuf; receive terminated");
 				goto abort_frame ;
 			}
 			data = smtod(mb,char *) - 1 ;
@@ -1278,7 +1278,7 @@ void process_receive(struct s_smc *smc)
 #else
 			for (r=rxd, i=used_frags ; i ; r=r->rxd_next, i--){
 				n = le32_to_cpu(r->rxd_rbctrl) & RD_LENGTH ;
-				DB_RX("cp SMT frame to mb: len = %d",n,0,6) ;
+				DB_RX(6, "cp SMT frame to mb: len = %d", n);
 				memcpy(data,r->rxd_virt,n) ;
 				data += n ;
 			}
@@ -1294,15 +1294,15 @@ void process_receive(struct s_smc *smc)
 			switch(fc) {
 			case FC_SMT_INFO :
 				smc->hw.fp.err_stats.err_smt_frame++ ;
-				DB_RX("SMT frame received ",0,0,5) ;
+				DB_RX(5, "SMT frame received");
 
 				if (smc->os.hwm.pass_SMT) {
-					DB_RX("pass SMT frame ",0,0,5) ;
+					DB_RX(5, "pass SMT frame");
 					mac_drv_rx_complete(smc, rxd,
 						frag_count,len) ;
 				}
 				else {
-					DB_RX("requeue RxD",0,0,5) ;
+					DB_RX(5, "requeue RxD");
 					mac_drv_requeue_rxd(smc,rxd,frag_count);
 				}
 
@@ -1310,7 +1310,7 @@ void process_receive(struct s_smc *smc)
 				break ;
 			case FC_SMT_NSA :
 				smc->hw.fp.err_stats.err_smt_frame++ ;
-				DB_RX("SMT frame received ",0,0,5) ;
+				DB_RX(5, "SMT frame received");
 
 				/* if pass_NSA set pass the NSA frame or */
 				/* pass_SMT set and the A-Indicator */
@@ -1318,12 +1318,12 @@ void process_receive(struct s_smc *smc)
 				if (smc->os.hwm.pass_NSA ||
 					(smc->os.hwm.pass_SMT &&
 					!(rfsw & A_INDIC))) {
-					DB_RX("pass SMT frame ",0,0,5) ;
+					DB_RX(5, "pass SMT frame");
 					mac_drv_rx_complete(smc, rxd,
 						frag_count,len) ;
 				}
 				else {
-					DB_RX("requeue RxD",0,0,5) ;
+					DB_RX(5, "requeue RxD");
 					mac_drv_requeue_rxd(smc,rxd,frag_count);
 				}
 
@@ -1331,12 +1331,12 @@ void process_receive(struct s_smc *smc)
 				break ;
 			case FC_BEACON :
 				if (smc->os.hwm.pass_DB) {
-					DB_RX("pass DB frame ",0,0,5) ;
+					DB_RX(5, "pass DB frame");
 					mac_drv_rx_complete(smc, rxd,
 						frag_count,len) ;
 				}
 				else {
-					DB_RX("requeue RxD",0,0,5) ;
+					DB_RX(5, "requeue RxD");
 					mac_drv_requeue_rxd(smc,rxd,frag_count);
 				}
 				smt_free_mbuf(smc,mb) ;
@@ -1345,9 +1345,9 @@ void process_receive(struct s_smc *smc)
 				/*
 				 * unknown FC abord the frame
 				 */
-				DB_RX("unknown FC error",0,0,2) ;
+				DB_RX(2, "unknown FC error");
 				smt_free_mbuf(smc,mb) ;
-				DB_RX("requeue RxD",0,0,5) ;
+				DB_RX(5, "requeue RxD");
 				mac_drv_requeue_rxd(smc,rxd,frag_count) ;
 				if ((fc & 0xf0) == FC_MAC)
 					smc->hw.fp.err_stats.err_mac_frame++ ;
@@ -1358,16 +1358,16 @@ void process_receive(struct s_smc *smc)
 			}
 		}
 
-		DB_RX("next RxD is %x ",queue->rx_curr_get,0,3) ;
+		DB_RX(3, "next RxD is %p", queue->rx_curr_get);
 		NDD_TRACE("RHx1",queue->rx_curr_get,0,0) ;
 
 		continue ;
 	/*--------------------------------------------------------------------*/
 abort_frame:
-		DB_RX("requeue RxD",0,0,5) ;
+		DB_RX(5, "requeue RxD");
 		mac_drv_requeue_rxd(smc,rxd,frag_count) ;
 
-		DB_RX("next RxD is %x ",queue->rx_curr_get,0,3) ;
+		DB_RX(3, "next RxD is %p", queue->rx_curr_get);
 		NDD_TRACE("RHx2",queue->rx_curr_get,0,0) ;
 	}
 rx_end:
@@ -1381,7 +1381,7 @@ static void smt_to_llc(struct s_smc *smc, SMbuf *mb)
 {
 	u_char	fc ;
 
-	DB_RX("send a queued frame to the llc layer",0,0,4) ;
+	DB_RX(4, "send a queued frame to the llc layer");
 	smc->os.hwm.r.len = mb->sm_len ;
 	smc->os.hwm.r.mb_pos = smtod(mb,char *) ;
 	fc = *smc->os.hwm.r.mb_pos ;
@@ -1419,7 +1419,7 @@ void hwm_rx_frag(struct s_smc *smc, char far *virt, u_long phys, int len,
 	__le32	rbctrl;
 
 	NDD_TRACE("RHfB",virt,len,frame_status) ;
-	DB_RX("hwm_rx_frag: len = %d, frame_status = %x\n",len,frame_status,2) ;
+	DB_RX(2, "hwm_rx_frag: len = %d, frame_status = %x", len, frame_status);
 	r = smc->hw.fp.rx_q[QUEUE_R1].rx_curr_put ;
 	r->rxd_virt = virt ;
 	r->rxd_rbadr = cpu_to_le32(phys) ;
@@ -1475,7 +1475,7 @@ void mac_drv_clear_rx_queue(struct s_smc *smc)
 	}
 
 	queue = smc->hw.fp.rx[QUEUE_R1] ;
-	DB_RX("clear_rx_queue",0,0,5) ;
+	DB_RX(5, "clear_rx_queue");
 
 	/*
 	 * dma_complete and mac_drv_clear_rxd for all RxDs / receive buffers
@@ -1483,7 +1483,7 @@ void mac_drv_clear_rx_queue(struct s_smc *smc)
 	r = queue->rx_curr_get ;
 	while (queue->rx_used) {
 		DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORCPU) ;
-		DB_RX("switch OWN bit of RxD 0x%p ",r,0,5) ;
+		DB_RX(5, "switch OWN bit of RxD 0x%p", r);
 		r->rxd_rbctrl &= ~cpu_to_le32(BMU_OWN) ;
 		frag_count = 1 ;
 		DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORDEV) ;
@@ -1491,23 +1491,23 @@ void mac_drv_clear_rx_queue(struct s_smc *smc)
 		DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORCPU) ;
 		while (r != queue->rx_curr_put &&
 			!(r->rxd_rbctrl & cpu_to_le32(BMU_ST_BUF))) {
-			DB_RX("Check STF bit in %x",(void *)r,0,5) ;
+			DB_RX(5, "Check STF bit in %p", r);
 			r->rxd_rbctrl &= ~cpu_to_le32(BMU_OWN) ;
 			DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORDEV) ;
 			r = r->rxd_next ;
 			DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORCPU) ;
 			frag_count++ ;
 		}
-		DB_RX("STF bit found",0,0,5) ;
+		DB_RX(5, "STF bit found");
 		next_rxd = r ;
 
 		for (r=queue->rx_curr_get,i=frag_count; i ; r=r->rxd_next,i--){
-			DB_RX("dma_complete for RxD %x",(void *)r,0,5) ;
+			DB_RX(5, "dma_complete for RxD %p", r);
 			dma_complete(smc,(union s_fp_descr volatile *)r,DMA_WR);
 		}
 
-		DB_RX("mac_drv_clear_rxd: RxD %x frag_count %d ",
-			(void *)queue->rx_curr_get,frag_count,5) ;
+		DB_RX(5, "mac_drv_clear_rxd: RxD %p frag_count %d",
+		      queue->rx_curr_get, frag_count);
 		mac_drv_clear_rxd(smc,queue->rx_curr_get,frag_count) ;
 
 		queue->rx_curr_get = next_rxd ;
@@ -1554,7 +1554,7 @@ int hwm_tx_init(struct s_smc *smc, u_char fc, int frag_count, int frame_len,
 	smc->os.hwm.tx_p = smc->hw.fp.tx[frame_status & QUEUE_A0] ;
 	smc->os.hwm.tx_descr = TX_DESCRIPTOR | (((u_long)(frame_len-1)&3)<<27) ;
 	smc->os.hwm.tx_len = frame_len ;
-	DB_TX("hwm_tx_init: fc = %x, len = %d",fc,frame_len,3) ;
+	DB_TX(3, "hwm_tx_init: fc = %x, len = %d", fc, frame_len);
 	if ((fc & ~(FC_SYNC_BIT|FC_LLC_PRIOR)) == FC_ASYNC_LLC) {
 		frame_status |= LAN_TX ;
 	}
@@ -1577,23 +1577,23 @@ int hwm_tx_init(struct s_smc *smc, u_char fc, int frag_count, int frame_len,
 	if (!smc->hw.mac_ring_is_up) {
 		frame_status &= ~LAN_TX ;
 		frame_status |= RING_DOWN ;
-		DB_TX("Ring is down: terminate LAN_TX",0,0,2) ;
+		DB_TX(2, "Ring is down: terminate LAN_TX");
 	}
 	if (frag_count > smc->os.hwm.tx_p->tx_free) {
 #ifndef	NDIS_OS2
 		mac_drv_clear_txd(smc) ;
 		if (frag_count > smc->os.hwm.tx_p->tx_free) {
-			DB_TX("Out of TxDs, terminate LAN_TX",0,0,2) ;
+			DB_TX(2, "Out of TxDs, terminate LAN_TX");
 			frame_status &= ~LAN_TX ;
 			frame_status |= OUT_OF_TXD ;
 		}
 #else
-		DB_TX("Out of TxDs, terminate LAN_TX",0,0,2) ;
+		DB_TX(2, "Out of TxDs, terminate LAN_TX");
 		frame_status &= ~LAN_TX ;
 		frame_status |= OUT_OF_TXD ;
 #endif
 	}
-	DB_TX("frame_status = %x",frame_status,0,3) ;
+	DB_TX(3, "frame_status = %x", frame_status);
 	NDD_TRACE("THiE",frame_status,smc->os.hwm.tx_p->tx_free,0) ;
 	return frame_status;
 }
@@ -1642,10 +1642,10 @@ void hwm_tx_frag(struct s_smc *smc, char far *virt, u_long phys, int len,
 	 */
 	t = queue->tx_curr_put ;
 
-	DB_TX("hwm_tx_frag: len = %d, frame_status = %x ",len,frame_status,2) ;
+	DB_TX(2, "hwm_tx_frag: len = %d, frame_status = %x", len, frame_status);
 	if (frame_status & LAN_TX) {
 		/* '*t' is already defined */
-		DB_TX("LAN_TX: TxD = %p, virt = %p ",t,virt,3) ;
+		DB_TX(3, "LAN_TX: TxD = %p, virt = %p", t, virt);
 		t->txd_virt = virt ;
 		t->txd_txdscr = cpu_to_le32(smc->os.hwm.tx_descr) ;
 		t->txd_tbadr = cpu_to_le32(phys) ;
@@ -1674,11 +1674,11 @@ void hwm_tx_frag(struct s_smc *smc, char far *virt, u_long phys, int len,
 		}
 	}
 	if (frame_status & LOC_TX) {
-		DB_TX("LOC_TX: ",0,0,3) ;
+		DB_TX(3, "LOC_TX:");
 		if (frame_status & FIRST_FRAG) {
 			if(!(smc->os.hwm.tx_mb = smt_get_mbuf(smc))) {
 				smc->hw.fp.err_stats.err_no_buf++ ;
-				DB_TX("No SMbuf; transmit terminated",0,0,4) ;
+				DB_TX(4, "No SMbuf; transmit terminated");
 			}
 			else {
 				smc->os.hwm.tx_data =
@@ -1693,7 +1693,7 @@ void hwm_tx_frag(struct s_smc *smc, char far *virt, u_long phys, int len,
 		}
 		if (smc->os.hwm.tx_mb) {
 #ifndef	USE_OS_CPY
-			DB_TX("copy fragment into MBuf ",0,0,3) ;
+			DB_TX(3, "copy fragment into MBuf");
 			memcpy(smc->os.hwm.tx_data,virt,len) ;
 			smc->os.hwm.tx_data += len ;
 #endif
@@ -1718,7 +1718,7 @@ void hwm_tx_frag(struct s_smc *smc, char far *virt, u_long phys, int len,
 				smc->os.hwm.tx_data++ ;
 				smc->os.hwm.tx_mb->sm_len =
 					smc->os.hwm.tx_len - 1 ;
-				DB_TX("pass LLC frame to SMT ",0,0,3) ;
+				DB_TX(3, "pass LLC frame to SMT");
 				smt_received_pack(smc,smc->os.hwm.tx_mb,
 						RD_FS_LOCAL) ;
 			}
@@ -1733,7 +1733,7 @@ void hwm_tx_frag(struct s_smc *smc, char far *virt, u_long phys, int len,
  */
 static void queue_llc_rx(struct s_smc *smc, SMbuf *mb)
 {
-	DB_GEN("queue_llc_rx: mb = %x",(void *)mb,0,4) ;
+	DB_GEN(4, "queue_llc_rx: mb = %p", mb);
 	smc->os.hwm.queued_rx_frames++ ;
 	mb->sm_next = (SMbuf *)NULL ;
 	if (smc->os.hwm.llc_rx_pipe == NULL) {
@@ -1763,7 +1763,7 @@ static SMbuf *get_llc_rx(struct s_smc *smc)
 		smc->os.hwm.queued_rx_frames-- ;
 		smc->os.hwm.llc_rx_pipe = mb->sm_next ;
 	}
-	DB_GEN("get_llc_rx: mb = 0x%x",(void *)mb,0,4) ;
+	DB_GEN(4, "get_llc_rx: mb = 0x%p", mb);
 	return mb;
 }
 
@@ -1773,7 +1773,7 @@ static SMbuf *get_llc_rx(struct s_smc *smc)
  */
 static void queue_txd_mb(struct s_smc *smc, SMbuf *mb)
 {
-	DB_GEN("_rx: queue_txd_mb = %x",(void *)mb,0,4) ;
+	DB_GEN(4, "_rx: queue_txd_mb = %p", mb);
 	smc->os.hwm.queued_txd_mb++ ;
 	mb->sm_next = (SMbuf *)NULL ;
 	if (smc->os.hwm.txd_tx_pipe == NULL) {
@@ -1796,7 +1796,7 @@ static SMbuf *get_txd_mb(struct s_smc *smc)
 		smc->os.hwm.queued_txd_mb-- ;
 		smc->os.hwm.txd_tx_pipe = mb->sm_next ;
 	}
-	DB_GEN("get_txd_mb: mb = 0x%x",(void *)mb,0,4) ;
+	DB_GEN(4, "get_txd_mb: mb = 0x%p", mb);
 	return mb;
 }
 
@@ -1819,7 +1819,7 @@ void smt_send_mbuf(struct s_smc *smc, SMbuf *mb, int fc)
 	__le32	tbctrl;
 
 	NDD_TRACE("THSB",mb,fc,0) ;
-	DB_TX("smt_send_mbuf: mb = 0x%p, fc = 0x%x",mb,fc,4) ;
+	DB_TX(4, "smt_send_mbuf: mb = 0x%p, fc = 0x%x", mb, fc);
 
 	mb->sm_off-- ;	/* set to fc */
 	mb->sm_len++ ;	/* + fc */
@@ -1838,7 +1838,7 @@ void smt_send_mbuf(struct s_smc *smc, SMbuf *mb, int fc)
 		if (n >= len) {
 			n = len ;
 		}
-		DB_TX("frag: virt/len = 0x%x/%d ",(void *)data,n,5) ;
+		DB_TX(5, "frag: virt/len = 0x%p/%d", data, n);
 		virt[frag_count] = data ;
 		frag_len[frag_count] = n ;
 		frag_count++ ;
@@ -1863,15 +1863,15 @@ void smt_send_mbuf(struct s_smc *smc, SMbuf *mb, int fc)
 	if (!smc->hw.mac_ring_is_up || frag_count > queue->tx_free) {
 		frame_status &= ~LAN_TX;
 		if (frame_status) {
-			DB_TX("Ring is down: terminate LAN_TX",0,0,2) ;
+			DB_TX(2, "Ring is down: terminate LAN_TX");
 		}
 		else {
-			DB_TX("Ring is down: terminate transmission",0,0,2) ;
+			DB_TX(2, "Ring is down: terminate transmission");
 			smt_free_mbuf(smc,mb) ;
 			return ;
 		}
 	}
-	DB_TX("frame_status = 0x%x ",frame_status,0,5) ;
+	DB_TX(5, "frame_status = 0x%x", frame_status);
 
 	if ((frame_status & LAN_TX) && (frame_status & LOC_TX)) {
 		mb->sm_use_count = 2 ;
@@ -1881,7 +1881,7 @@ void smt_send_mbuf(struct s_smc *smc, SMbuf *mb, int fc)
 		t = queue->tx_curr_put ;
 		frame_status |= FIRST_FRAG ;
 		for (i = 0; i < frag_count; i++) {
-			DB_TX("init TxD = 0x%x",(void *)t,0,5) ;
+			DB_TX(5, "init TxD = 0x%p", t);
 			if (i == frag_count-1) {
 				frame_status |= LAST_FRAG ;
 				t->txd_txdscr = cpu_to_le32(TX_DESCRIPTOR |
@@ -1912,7 +1912,7 @@ void smt_send_mbuf(struct s_smc *smc, SMbuf *mb, int fc)
 	}
 
 	if (frame_status & LOC_TX) {
-		DB_TX("pass Mbuf to LLC queue",0,0,5) ;
+		DB_TX(5, "pass Mbuf to LLC queue");
 		queue_llc_rx(smc,mb) ;
 	}
 
@@ -1953,18 +1953,18 @@ static void mac_drv_clear_txd(struct s_smc *smc)
 	for (i = QUEUE_S; i <= QUEUE_A0; i++) {
 		queue = smc->hw.fp.tx[i] ;
 		t1 = queue->tx_curr_get ;
-		DB_TX("clear_txd: QUEUE = %d (0=sync/1=async)",i,0,5) ;
+		DB_TX(5, "clear_txd: QUEUE = %d (0=sync/1=async)", i);
 
 		for ( ; ; ) {
 			frag_count = 0 ;
 
 			do {
 				DRV_BUF_FLUSH(t1,DDI_DMA_SYNC_FORCPU) ;
-				DB_TX("check OWN/EOF bit of TxD 0x%p",t1,0,5) ;
+				DB_TX(5, "check OWN/EOF bit of TxD 0x%p", t1);
 				tbctrl = le32_to_cpu(CR_READ(t1->txd_tbctrl));
 
 				if (tbctrl & BMU_OWN || !queue->tx_used){
-					DB_TX("End of TxDs queue %d",i,0,4) ;
+					DB_TX(4, "End of TxDs queue %d", i);
 					goto free_next_queue ;	/* next queue */
 				}
 				t1 = t1->txd_next ;
@@ -1988,11 +1988,11 @@ static void mac_drv_clear_txd(struct s_smc *smc)
 			}
 			else {
 #ifndef PASS_1ST_TXD_2_TX_COMP
-				DB_TX("mac_drv_tx_comp for TxD 0x%p",t2,0,4) ;
+				DB_TX(4, "mac_drv_tx_comp for TxD 0x%p", t2);
 				mac_drv_tx_complete(smc,t2) ;
 #else
-				DB_TX("mac_drv_tx_comp for TxD 0x%x",
-					queue->tx_curr_get,0,4) ;
+				DB_TX(4, "mac_drv_tx_comp for TxD 0x%x",
+				      queue->tx_curr_get);
 				mac_drv_tx_complete(smc,queue->tx_curr_get) ;
 #endif
 			}
@@ -2043,7 +2043,7 @@ void mac_drv_clear_tx_queue(struct s_smc *smc)
 
 	for (i = QUEUE_S; i <= QUEUE_A0; i++) {
 		queue = smc->hw.fp.tx[i] ;
-		DB_TX("clear_tx_queue: QUEUE = %d (0=sync/1=async)",i,0,5) ;
+		DB_TX(5, "clear_tx_queue: QUEUE = %d (0=sync/1=async)", i);
 
 		/*
 		 * switch the OWN bit of all pending frames to the host
@@ -2052,7 +2052,7 @@ void mac_drv_clear_tx_queue(struct s_smc *smc)
 		tx_used = queue->tx_used ;
 		while (tx_used) {
 			DRV_BUF_FLUSH(t,DDI_DMA_SYNC_FORCPU) ;
-			DB_TX("switch OWN bit of TxD 0x%p ",t,0,5) ;
+			DB_TX(5, "switch OWN bit of TxD 0x%p", t);
 			t->txd_tbctrl &= ~cpu_to_le32(BMU_OWN) ;
 			DRV_BUF_FLUSH(t,DDI_DMA_SYNC_FORDEV) ;
 			t = t->txd_next ;
-- 
2.10.0.rc2.1.g053435c

^ permalink raw reply related

* Re: [PATCH v7 6/6] siphash: implement HalfSipHash1-3 for hash tables
From: Andi Kleen @ 2016-12-22  0:46 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, edumazet, Linus Torvalds,
	Eric Biggers, Tom Herbert, davem, luto, Jean-Philippe Aumasson
In-Reply-To: <20161221230216.25341-7-Jason@zx2c4.com>

> 64-bit x86_64:
> [    0.509409] test_siphash:     SipHash2-4 cycles: 4049181
> [    0.510650] test_siphash:     SipHash1-3 cycles: 2512884
> [    0.512205] test_siphash: HalfSipHash1-3 cycles: 3429920
> [    0.512904] test_siphash:    JenkinsHash cycles:  978267

I'm not sure what these numbers mean. Surely a single siphash2-4
does not take 4+ million cycles? 

If you run them in a loop please divide by the iterations.

But generally running small code in a loop is often an unrealistic
benchmark strategy because it hides cache misses, primes
predictors, changes frequencies and changes memory costs,
but also can overload pipelines and oversubscribe
resources.

[see also page 46+ in http://halobates.de/applicative-mental-models.pdf]

So the numbers you get there are at least somewhat
dubious. It would be good to have at least some test which
is not just a tiny micro benchmark to compare before making
conclusions.

-Andi

^ permalink raw reply

* [PATCH net 1/1] tipc: revert use of copy_from_iter_full()
From: Jon Maloy @ 2016-12-22  1:01 UTC (permalink / raw)
  To: davem
  Cc: netdev, Al Viro, parthasarathy.bhuvaragan, ying.xue, maloy,
	tipc-discussion, Jon Maloy

commit cbbd26b8b1a6 ("[iov_iter] new primitives - copy_from_iter_full()
and friends") replaced calls to copy_from_iter() in the function
tipc_msg_build(). This causes a an immediate crash as follows:

[ 1209.597076] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
[ 1209.607025] IP: copy_from_iter_full+0x43/0x290
[ 1209.611617] PGD 130f63067
[ 1209.611621] PUD 130f64067
[ 1209.614437] PMD 0
[ 1209.616966]
[ 1209.620351] Oops: 0000 [#1] SMP
[ 1209.622739] Modules linked in: tipc(E) ip6_udp_tunnel udp_tunnel rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat grace fscache nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack xt_tcpudp iptable_filter ip_tables x_tables ppdev joydev serio_raw parport_pc parport i2c_piix4 sunrpc autofs4 floppy psmouse pata_acpi [last unloaded: tipc]
[ 1209.643115] CPU: 7 PID: 1911 Comm: tipcTC Tainted: G            E   4.9.0+ #619
[ 1209.647707] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 1209.653393] task: ffff96c2f0c58000 task.stack: ffffb7cdc07a8000
[ 1209.656626] RIP: 0010:copy_from_iter_full+0x43/0x290
[ 1209.659412] RSP: 0018:ffffb7cdc07abc38 EFLAGS: 00010246
[ 1209.662459] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000018
[ 1209.666443] RDX: ffffb7cdc07abe00 RSI: 0000000000000000 RDI: ffff96c2f39e86b8
[ 1209.669918] RBP: ffffb7cdc07abc78 R08: 0000000000000000 R09: 0000000000000000
[ 1209.673379] R10: ffff96c2f50032c0 R11: ffff96c2f39e8600 R12: ffffb7cdc07abe00
[ 1209.676585] R13: 0000000000000000 R14: ffff96c2f39e86b8 R15: 0000000000000000
[ 1209.678573] FS:  00007fb5db2a3700(0000) GS:ffff96c2f9dc0000(0000) knlGS:0000000000000000
[ 1209.681433] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1209.684321] CR2: 0000000000000008 CR3: 0000000130f66000 CR4: 00000000000406e0
[ 1209.687676] Call Trace:
[ 1209.689257]  tipc_msg_build+0xe1/0x590 [tipc]
[ 1209.691479]  ? _raw_spin_unlock_bh+0x1e/0x20
[ 1209.694641]  ? tipc_node_find+0x30/0xa0 [tipc]
[ 1209.696789]  __tipc_sendmsg+0x189/0x480 [tipc]
[ 1209.699017]  ? remove_wait_queue+0x4d/0x60
[ 1209.700354]  tipc_connect+0x15f/0x1b0 [tipc]
[ 1209.701684]  SYSC_connect+0xd9/0x110
[ 1209.702847]  ? sock_alloc_file+0xa6/0x130
[ 1209.704083]  ? fd_install+0x25/0x30
[ 1209.705195]  ? sock_map_fd+0x44/0x70
[ 1209.706331]  SyS_connect+0xe/0x10
[ 1209.707385]  entry_SYSCALL_64_fastpath+0x1e/0xad
[ 1209.708714] RIP: 0033:0x7fb5dadca870
[ 1209.709700] RSP: 002b:00007fff6ea43978 EFLAGS: 00000246 ORIG_RAX: 000000000000002a
[ 1209.711948] RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007fb5dadca870
[ 1209.713804] RDX: 0000000000000010 RSI: 00007fff6ea43990 RDI: 0000000000000003
[ 1209.715413] RBP: 00000000012e8010 R08: 0000000000008004 R09: 0000000000000009
[ 1209.716440] R10: 000000000000012a R11: 0000000000000246 R12: 0000000000000009
[ 1209.717461] R13: 0000000000000001 R14: 00007fb5db087620 R15: 0000000000404fe4
[ 1209.718486] Code: 01 00 00 48 39 72 10 49 89 f5 49 89 d4 0f 82 69 01 00 00 48 8b 72 08 45 31 c9 a8 04 49 89 fe 45 89 e8 41 89 f7 75 6f 4c 8b 7a 18 <49> 8b 5f 08 48 29 f3 4c 39 eb 49 0f 47 dd a8 02 0f 85 6e 01 00
[ 1209.721328] RIP: copy_from_iter_full+0x43/0x290 RSP: ffffb7cdc07abc38
[ 1209.723573] CR2: 0000000000000008
[ 1209.725262] ---[ end trace 752756f533c3f533 ]---
[ 1209.726603] Kernel panic - not syncing: Fatal exception
[ 1209.728293] Kernel Offset: 0x2f000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 1209.731225] ---[ end Kernel panic - not syncing: Fatal exception

When we revert the change everything works fine, so we choose this solution for now.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
---
 net/tipc/msg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index a22be50..17201aa 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -268,7 +268,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		__skb_queue_tail(list, skb);
 		skb_copy_to_linear_data(skb, mhdr, mhsz);
 		pktpos = skb->data + mhsz;
-		if (copy_from_iter_full(pktpos, dsz, &m->msg_iter))
+		if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz)
 			return dsz;
 		rc = -EFAULT;
 		goto error;
@@ -299,7 +299,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		if (drem < pktrem)
 			pktrem = drem;
 
-		if (!copy_from_iter_full(pktpos, pktrem, &m->msg_iter)) {
+		if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) {
 			rc = -EFAULT;
 			goto error;
 		}
-- 
2.7.4

^ permalink raw reply related

* Re: HalfSipHash Acceptable Usage
From: George Spelvin @ 2016-12-22  1:13 UTC (permalink / raw)
  To: linux, tytso
  Cc: ak, davem, David.Laight, djb, ebiggers3, eric.dumazet, hannes,
	Jason, jeanphilippe.aumasson, kernel-hardening, linux-crypto,
	linux-kernel, luto, netdev, tom, torvalds, vegard.nossum
In-Reply-To: <20161221222702.h2vboms776zpgpi4@thunk.org>

As a separate message, to disentangle the threads, I'd like to
talk about get_random_long().

After some thinking, I still like the "state-preserving" construct
that's equivalent to the current MD5 code.  Yes, we could just do
siphash(current_cpu || per_cpu_counter, global_key), but it's nice to
preserve a bit more.

It requires library support from the SipHash code to return the full
SipHash state, but I hope that's a fair thing to ask for.

Here's my current straw man design for comment.  It's very similar to
the current MD5-based design, but feeds all the seed material in the
"correct" way, as opposed to Xring directly into the MD5 state.

* Each CPU has a (Half)SipHash state vector,
  "unsigned long get_random_int_hash[4]".  Unlike the current
  MD5 code, we take care to initialize it to an asymmetric state.

* There's a global 256-bit random_int_secret (which we could
  reseed periodically).

To generate a random number:
* If get_random_int_hash is all-zero, seed it with fresh a half-sized
  SipHash key and the appropriate XOR constants.
* Generate three words of random_get_entropy(), jiffies, and current->pid.
  (This is arbitary seed material, copied from the current code.)
* Crank through that with (Half)SipHash-1-0.
* Crank through the random_int_secret with (Half)SipHash-1-0.
* Return v1 ^ v3.

Here are the reasons:
* The first step is just paranoia, but SipHash's security promise depends
  on starting with an asymmetric state, we want unique per-CPU states,
  and it's a one-time cost.
* When the input words are themselves secret, there's no security
  advantage, and almost no speed advantage, to doing two rounds for one
  input word versus two words with one round each.  Thus, SipHash-1.
* The above is not exactly true, due to the before+after XOR pattern
  that SipHash uses, but I think it's true anyway.
* Likewise, there's no benefit to unkeyed finalization rounds over keyed
  ones.  That's why I just enlarged the global secret.
* The per-call seed material is hashed first on general principles,
  because that's the novel part that might have fresh entropy.
* To the extent the initial state is secret, the rounds processing the
  global secret are 4 finalization rounds for the initial state and
  the per-call entropy.
* The final word(s) of the global secret might be vulnerable to analysis,
  due to incomplete mixing, but since the global secret is always hashed
  in the same order, and larger that the desired security level, the
  initial words should be secure.
* By carrying forward the full internal state, we ensure that repeated
  calls return different results, and to the extent that the per-call
  seed material has entropy, it's preserved.
* The final return is all that's needed, since the last steps in the 
  SipRound are "v1 ^= v2" and "v3 ^= v0".  It's no security loss,
  and a very minor speedup.
* Also, this avoids directly "exposing" the final XOR with the last
  word of the global secret (which is made to v0).

If I'm allowed to use full SipHash, some shortcuts can be taken,
but I believe the above would be secure with HalfSipHash.

If additional performance is required, I'd consider shrinking the
global secret to 192 bits on 32-bit machines but I want more than
128 bits of ey material, and enough rounds to be equivalent to 4
finalization rounds.

^ permalink raw reply

* Re: [PATCH] ethtool: add one ethtool option to set relax ordering mode
From: Stephen Hemminger @ 2016-12-22  1:27 UTC (permalink / raw)
  To: Mao Wenan; +Cc: netdev, jeffrey.t.kirsher
In-Reply-To: <1481179898-10668-2-git-send-email-maowenan@huawei.com>

On Thu, 8 Dec 2016 14:51:38 +0800
Mao Wenan <maowenan@huawei.com> wrote:

> This patch provides one way to set/unset IXGBE NIC TX and RX
> relax ordering mode, which can be set by ethtool.
> Relax ordering is one mode of 82599 NIC, to enable this mode
> can enhance the performance for some cpu architecure.

Then it should be done by CPU architecture specific quirks (preferably in PCI layer)
so that all users get the option without having to do manual intervention.

> example:
> ethtool -s enp1s0f0 relaxorder off
> ethtool -s enp1s0f0 relaxorder on

Doing it via ethtool is a developer API (for testing) not something that makes
sense in production.

^ permalink raw reply

* [PATCH net] ipvlan: fix multicast processing
From: Mahesh Bandewar @ 2016-12-22  1:30 UTC (permalink / raw)
  To: netdev, Eric Dumazet, David Miller; +Cc: Mahesh Bandewar

From: Mahesh Bandewar <maheshb@google.com>

In an IPvlan setup when master is set in loopback mode e.g.

  ethtool -K eth0 set loopback on

  where eth0 is master device for IPvlan setup.

The failure is caused by the faulty logic that determines if the
packet is from TX-path vs. RX-path by just looking at the mac-
addresses on the packet while processing multicast packets.

In the loopback-mode where this crash was happening, the packets
that are sent out are reflected by the NIC and are processed on
the RX path, but mac-address check tricks into thinking this
packet is from TX path and falsely uses dev_forward_skb() to pass
packets to the slave (virtual) devices.

This patch records the path while queueing packets and eliminates
logic of looking at mac-addresses for the same decision.

------------[ cut here ]------------
kernel BUG at include/linux/skbuff.h:1737!
Call Trace:
 [<ffffffff921fbbc2>] dev_forward_skb+0x92/0xd0
 [<ffffffffc031ac65>] ipvlan_process_multicast+0x395/0x4c0 [ipvlan]
 [<ffffffffc031a9a7>] ? ipvlan_process_multicast+0xd7/0x4c0 [ipvlan]
 [<ffffffff91cdfea7>] ? process_one_work+0x147/0x660
 [<ffffffff91cdff09>] process_one_work+0x1a9/0x660
 [<ffffffff91cdfea7>] ? process_one_work+0x147/0x660
 [<ffffffff91ce086d>] worker_thread+0x11d/0x360
 [<ffffffff91ce0750>] ? rescuer_thread+0x350/0x350
 [<ffffffff91ce960b>] kthread+0xdb/0xe0
 [<ffffffff91c05c70>] ? _raw_spin_unlock_irq+0x30/0x50
 [<ffffffff91ce9530>] ? flush_kthread_worker+0xc0/0xc0
 [<ffffffff92348b7a>] ret_from_fork+0x9a/0xd0
 [<ffffffff91ce9530>] ? flush_kthread_worker+0xc0/0xc0

Fixes: ba35f8588f47 ("ipvlan: Defer multicast / broadcast processing to a work-queue")
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
CC: Eric Dumazet <edumazet@google.com>
---
Note that this is on top of Eric's patch sent earlier.

 drivers/net/ipvlan/ipvlan.h      |  5 +++++
 drivers/net/ipvlan/ipvlan_core.c | 26 +++++++++++++++-----------
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 031093e1c25f..dbfbb33ac66c 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -99,6 +99,11 @@ struct ipvl_port {
 	int			count;
 };
 
+struct ipvl_skb_cb {
+	bool tx_pkt;
+};
+#define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0]))
+
 static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
 {
 	return rcu_dereference(d->rx_handler_data);
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index ea6bc1e12cdf..83ce74acf82d 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -198,7 +198,7 @@ void ipvlan_process_multicast(struct work_struct *work)
 	unsigned int mac_hash;
 	int ret;
 	u8 pkt_type;
-	bool hlocal, dlocal;
+	bool tx_pkt;
 
 	__skb_queue_head_init(&list);
 
@@ -211,7 +211,7 @@ void ipvlan_process_multicast(struct work_struct *work)
 		bool consumed = false;
 
 		ethh = eth_hdr(skb);
-		hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr);
+		tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
 		mac_hash = ipvlan_mac_hash(ethh->h_dest);
 
 		if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
@@ -219,13 +219,10 @@ void ipvlan_process_multicast(struct work_struct *work)
 		else
 			pkt_type = PACKET_MULTICAST;
 
-		dlocal = false;
 		rcu_read_lock();
 		list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
-			if (hlocal && (ipvlan->dev == dev)) {
-				dlocal = true;
+			if (tx_pkt && (ipvlan->dev == skb->dev))
 				continue;
-			}
 			if (!test_bit(mac_hash, ipvlan->mac_filters))
 				continue;
 			if (!(ipvlan->dev->flags & IFF_UP))
@@ -238,7 +235,7 @@ void ipvlan_process_multicast(struct work_struct *work)
 				consumed = true;
 				nskb->pkt_type = pkt_type;
 				nskb->dev = ipvlan->dev;
-				if (hlocal)
+				if (tx_pkt)
 					ret = dev_forward_skb(ipvlan->dev, nskb);
 				else
 					ret = netif_rx(nskb);
@@ -248,7 +245,7 @@ void ipvlan_process_multicast(struct work_struct *work)
 		}
 		rcu_read_unlock();
 
-		if (dlocal) {
+		if (tx_pkt) {
 			/* If the packet originated here, send it out. */
 			skb->dev = port->dev;
 			skb->pkt_type = pkt_type;
@@ -480,13 +477,20 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 }
 
 static void ipvlan_multicast_enqueue(struct ipvl_port *port,
-				     struct sk_buff *skb)
+				     struct sk_buff *skb, bool tx_pkt)
 {
 	if (skb->protocol == htons(ETH_P_PAUSE)) {
 		kfree_skb(skb);
 		return;
 	}
 
+	/* Record that the deferred packet is from TX or RX path. By
+	 * looking at mac-addresses on packet will lead to erronus decisions.
+	 * (This would be true for a loopback-mode on master device or a
+	 * hair-pin mode of the switch.)
+	 */
+	IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
+
 	spin_lock(&port->backlog.lock);
 	if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
 		if (skb->dev)
@@ -549,7 +553,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 
 	} else if (is_multicast_ether_addr(eth->h_dest)) {
 		ipvlan_skb_crossing_ns(skb, NULL);
-		ipvlan_multicast_enqueue(ipvlan->port, skb);
+		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
 		return NET_XMIT_SUCCESS;
 	}
 
@@ -646,7 +650,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 			 */
 			if (nskb) {
 				ipvlan_skb_crossing_ns(nskb, NULL);
-				ipvlan_multicast_enqueue(port, nskb);
+				ipvlan_multicast_enqueue(port, nskb, false);
 			}
 		}
 	} else {
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related

* RE: [PATCH] ethtool: add one ethtool option to set relax ordering mode
From: maowenan @ 2016-12-22  1:39 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: netdev@vger.kernel.org, jeffrey.t.kirsher@intel.com,
	weiyongjun (A), Dingtianhong
In-Reply-To: <20161221172759.1bc0d0dd@xeon-e3>



> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, December 22, 2016 9:28 AM
> To: maowenan
> Cc: netdev@vger.kernel.org; jeffrey.t.kirsher@intel.com
> Subject: Re: [PATCH] ethtool: add one ethtool option to set relax ordering mode
> 
> On Thu, 8 Dec 2016 14:51:38 +0800
> Mao Wenan <maowenan@huawei.com> wrote:
> 
> > This patch provides one way to set/unset IXGBE NIC TX and RX relax
> > ordering mode, which can be set by ethtool.
> > Relax ordering is one mode of 82599 NIC, to enable this mode can
> > enhance the performance for some cpu architecure.
> 
> Then it should be done by CPU architecture specific quirks (preferably in PCI
> layer) so that all users get the option without having to do manual intervention.
> 
> > example:
> > ethtool -s enp1s0f0 relaxorder off
> > ethtool -s enp1s0f0 relaxorder on
> 
> Doing it via ethtool is a developer API (for testing) not something that makes
> sense in production.


This feature is not mandatory for all users, acturally relax ordering default configuration of 82599 is 'disable',
So this patch gives one way to enable relax ordering to be selected in some performance condition.

^ permalink raw reply

* Re: ipv6: handle -EFAULT from skb_copy_bits
From: Dave Jones @ 2016-12-22  1:40 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: David Miller, xiyou.wangcong, netdev
In-Reply-To: <1482356000.2260.13.camel@stressinduktion.org>

On Wed, Dec 21, 2016 at 10:33:20PM +0100, Hannes Frederic Sowa wrote:

 > > Given all of this, I think the best thing to do is validate the offset
 > > after the queue walks, which is pretty much what Dave Jones's original
 > > patch was doing.
 > 
 > I think both approaches protect against the bug reasonably well, but
 > Dave's patch has a bug: we must either call ip6_flush_pending_frames to
 > clear the socket write queue with the buggy send request.

I can fix that up and resubmit, or we can go with your approach.
DaveM ?

	Dave

^ permalink raw reply

* Re: [PATCH v7 1/6] siphash: add cryptographically secure PRF
From: Stephen Hemminger @ 2016-12-22  1:40 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Netdev, kernel-hardening, LKML, linux-crypto, David Laight,
	Ted Tso, Hannes Frederic Sowa, edumazet, Linus Torvalds,
	Eric Biggers, Tom Herbert, ak, davem, luto,
	Jean-Philippe Aumasson, Eric Dumazet
In-Reply-To: <20161221230216.25341-2-Jason@zx2c4.com>

On Thu, 22 Dec 2016 00:02:11 +0100
"Jason A. Donenfeld" <Jason@zx2c4.com> wrote:

> SipHash is a 64-bit keyed hash function that is actually a
> cryptographically secure PRF, like HMAC. Except SipHash is super fast,
> and is meant to be used as a hashtable keyed lookup function, or as a
> general PRF for short input use cases, such as sequence numbers or RNG
> chaining.
> 
> For the first usage:
> 
> There are a variety of attacks known as "hashtable poisoning" in which an
> attacker forms some data such that the hash of that data will be the
> same, and then preceeds to fill up all entries of a hashbucket. This is
> a realistic and well-known denial-of-service vector. Currently
> hashtables use jhash, which is fast but not secure, and some kind of
> rotating key scheme (or none at all, which isn't good). SipHash is meant
> as a replacement for jhash in these cases.
> 
> There are a modicum of places in the kernel that are vulnerable to
> hashtable poisoning attacks, either via userspace vectors or network
> vectors, and there's not a reliable mechanism inside the kernel at the
> moment to fix it. The first step toward fixing these issues is actually
> getting a secure primitive into the kernel for developers to use. Then
> we can, bit by bit, port things over to it as deemed appropriate.
> 
> While SipHash is extremely fast for a cryptographically secure function,
> it is likely a bit slower than the insecure jhash, and so replacements
> will be evaluated on a case-by-case basis based on whether or not the
> difference in speed is negligible and whether or not the current jhash usage
> poses a real security risk.
> 
> For the second usage:
> 
> A few places in the kernel are using MD5 or SHA1 for creating secure
> sequence numbers, syn cookies, port numbers, or fast random numbers.
> SipHash is a faster and more fitting, and more secure replacement for MD5
> in those situations. Replacing MD5 and SHA1 with SipHash for these uses is
> obvious and straight-forward, and so is submitted along with this patch
> series. There shouldn't be much of a debate over its efficacy.
> 
> Dozens of languages are already using this internally for their hash
> tables and PRFs. Some of the BSDs already use this in their kernels.
> SipHash is a widely known high-speed solution to a widely known set of
> problems, and it's time we catch-up.
> 
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Eric Biggers <ebiggers3@gmail.com>
> Cc: David Laight <David.Laight@aculab.com>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>

The networking tree (net-next) which is where you are submitting to is technically
closed right now.

^ permalink raw reply

* Re: [PATCH v7 1/6] siphash: add cryptographically secure PRF
From: Jason A. Donenfeld @ 2016-12-22  1:42 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Netdev, kernel-hardening, LKML, Linux Crypto Mailing List,
	David Laight, Ted Tso, Hannes Frederic Sowa, Eric Dumazet,
	Linus Torvalds, Eric Biggers, Tom Herbert, Andi Kleen,
	David Miller, Andy Lutomirski, Jean-Philippe Aumasson,
	Eric Dumazet

On Thu, Dec 22, 2016 at 2:40 AM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> The networking tree (net-next) which is where you are submitting to is technically
> closed right now.

That's okay. At some point in the future it will be open. By then v83
of this patch set will be shiny and done, just waiting for the merge
window to open. There's a lot to discuss with this, so getting the
feedback early is beneficial.

Jason

^ permalink raw reply

* Re: [PATCH net 1/1] tipc: revert use of copy_from_iter_full()
From: Al Viro @ 2016-12-22  1:43 UTC (permalink / raw)
  To: Jon Maloy
  Cc: davem, netdev, parthasarathy.bhuvaragan, ying.xue, maloy,
	tipc-discussion
In-Reply-To: <20161222012101.GF1555@ZenIV.linux.org.uk>

On Thu, Dec 22, 2016 at 01:21:01AM +0000, Al Viro wrote:
> On Wed, Dec 21, 2016 at 08:01:37PM -0500, Jon Maloy wrote:
> > commit cbbd26b8b1a6 ("[iov_iter] new primitives - copy_from_iter_full()
> > and friends") replaced calls to copy_from_iter() in the function
> > tipc_msg_build(). This causes a an immediate crash as follows:
> 
> Very interesting.
> 
> > [ 1209.597076] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
> > [ 1209.607025] IP: copy_from_iter_full+0x43/0x290
> 
> > [ 1209.689257]  tipc_msg_build+0xe1/0x590 [tipc]
> > [ 1209.691479]  ? _raw_spin_unlock_bh+0x1e/0x20
> > [ 1209.694641]  ? tipc_node_find+0x30/0xa0 [tipc]
> > [ 1209.696789]  __tipc_sendmsg+0x189/0x480 [tipc]
> > [ 1209.699017]  ? remove_wait_queue+0x4d/0x60
> > [ 1209.700354]  tipc_connect+0x15f/0x1b0 [tipc]
> > [ 1209.701684]  SYSC_connect+0xd9/0x110
> 
> I don't believe that it's something tipc-specific; could you post an objdump
> of copy_from_iter_full() in your kernel?  That smells like a bug in there
> and it really ought to be fixed...

FWIW, looking at the tipc, am I reading the trace correctly?  We seem to
have tipc_connect() taking an msghdr with empty payload and hitting this
        switch (sk->sk_state) {
        case TIPC_OPEN:
                /* Send a 'SYN-' to destination */
                m.msg_name = dest;
                m.msg_namelen = destlen;

                /* If connect is in non-blocking case, set MSG_DONTWAIT to
                 * indicate send_msg() is never blocked.
                 */
                if (!timeout)
                        m.msg_flags = MSG_DONTWAIT;

                res = __tipc_sendmsg(sock, &m, 0);
which eventually calls
        rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
possibly more than once, but with explicit "restore m->msg_iter to what
it was before the first call" before each subsequent call.

What's putting anything into m.msg_iter on that codepath?  AFAICS, it should
be completely empty...  Wait.  AAARRRGH!

OK, I see what's going on there - unlike iterate_and_advance(), which
explicitly skips any work in case of empty iterator, iterate_all_kind()
does not.  Could you check if the following fixes your problem?

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 228892dabba6..6a0396b8d47f 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -73,19 +73,21 @@
 }
 
 #define iterate_all_kinds(i, n, v, I, B, K) {			\
-	size_t skip = i->iov_offset;				\
-	if (unlikely(i->type & ITER_BVEC)) {			\
-		struct bio_vec v;				\
-		struct bvec_iter __bi;				\
-		iterate_bvec(i, n, v, __bi, skip, (B))		\
-	} else if (unlikely(i->type & ITER_KVEC)) {		\
-		const struct kvec *kvec;			\
-		struct kvec v;					\
-		iterate_kvec(i, n, v, kvec, skip, (K))		\
-	} else {						\
-		const struct iovec *iov;			\
-		struct iovec v;					\
-		iterate_iovec(i, n, v, iov, skip, (I))		\
+	if (i->count) {						\
+		size_t skip = i->iov_offset;			\
+		if (unlikely(i->type & ITER_BVEC)) {		\
+			struct bio_vec v;			\
+			struct bvec_iter __bi;			\
+			iterate_bvec(i, n, v, __bi, skip, (B))	\
+		} else if (unlikely(i->type & ITER_KVEC)) {	\
+			const struct kvec *kvec;		\
+			struct kvec v;				\
+			iterate_kvec(i, n, v, kvec, skip, (K))	\
+		} else {					\
+			const struct iovec *iov;		\
+			struct iovec v;				\
+			iterate_iovec(i, n, v, iov, skip, (I))	\
+		}						\
 	}							\
 }
 

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox