Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next] bpf: map_get_next_key to return first key on NULL
From: Alexei Starovoitov @ 2017-04-25  2:00 UTC (permalink / raw)
  To: David S . Miller; +Cc: Daniel Borkmann, Teng Qin, netdev, kernel-team

From: Teng Qin <qinteng@fb.com>

When iterating through a map, we need to find a key that does not exist
in the map so map_get_next_key will give us the first key of the map.
This often requires a lot of guessing in production systems.

This patch makes map_get_next_key return the first key when the key
pointer in the parameter is NULL.

Signed-off-by: Teng Qin <qinteng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/trace/events/bpf.h              | 10 +++++++---
 kernel/bpf/arraymap.c                   |  2 +-
 kernel/bpf/hashtab.c                    |  9 +++++----
 kernel/bpf/syscall.c                    | 20 ++++++++++++--------
 tools/testing/selftests/bpf/test_maps.c | 29 +++++++++++++++++++++++++----
 5 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
index c3a53fd47ff1..52c8425d144b 100644
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
@@ -321,11 +321,14 @@ TRACE_EVENT(bpf_map_next_key,
 		__dynamic_array(u8, key, map->key_size)
 		__dynamic_array(u8, nxt, map->key_size)
 		__field(bool, key_trunc)
+		__field(bool, key_null)
 		__field(int, ufd)
 	),
 
 	TP_fast_assign(
-		memcpy(__get_dynamic_array(key), key, map->key_size);
+		if (key)
+			memcpy(__get_dynamic_array(key), key, map->key_size);
+		__entry->key_null = !key;
 		memcpy(__get_dynamic_array(nxt), key_next, map->key_size);
 		__entry->type      = map->map_type;
 		__entry->key_len   = min(map->key_size, 16U);
@@ -336,8 +339,9 @@ TRACE_EVENT(bpf_map_next_key,
 	TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]",
 		  __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB),
 		  __entry->ufd,
-		  __print_hex(__get_dynamic_array(key), __entry->key_len),
-		  __entry->key_trunc ? " ..." : "",
+		  __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key),
+							   __entry->key_len),
+		  __entry->key_trunc && !__entry->key_null ? " ..." : "",
 		  __print_hex(__get_dynamic_array(nxt), __entry->key_len),
 		  __entry->key_trunc ? " ..." : "")
 );
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index ec621df5a97a..5e00b2333c26 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -182,7 +182,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
-	u32 index = *(u32 *)key;
+	u32 index = key ? *(u32 *)key : U32_MAX;
 	u32 *next = (u32 *)next_key;
 
 	if (index >= array->map.max_entries) {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index bc80c038e430..004334ea13ba 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -540,12 +540,15 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	struct hlist_nulls_head *head;
 	struct htab_elem *l, *next_l;
 	u32 hash, key_size;
-	int i;
+	int i = 0;
 
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	key_size = map->key_size;
 
+	if (!key)
+		goto find_first_elem;
+
 	hash = htab_map_hash(key, key_size);
 
 	head = select_bucket(htab, hash);
@@ -553,10 +556,8 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	/* lookup the key */
 	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
 
-	if (!l) {
-		i = 0;
+	if (!l)
 		goto find_first_elem;
-	}
 
 	/* key was found, get next key in the same bucket */
 	next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b89288e2b589..13642c73dca0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -536,14 +536,18 @@ static int map_get_next_key(union bpf_attr *attr)
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 
-	err = -ENOMEM;
-	key = kmalloc(map->key_size, GFP_USER);
-	if (!key)
-		goto err_put;
-
-	err = -EFAULT;
-	if (copy_from_user(key, ukey, map->key_size) != 0)
-		goto free_key;
+	if (ukey) {
+		err = -ENOMEM;
+		key = kmalloc(map->key_size, GFP_USER);
+		if (!key)
+			goto err_put;
+
+		err = -EFAULT;
+		if (copy_from_user(key, ukey, map->key_size) != 0)
+			goto free_key;
+	} else {
+		key = NULL;
+	}
 
 	err = -ENOMEM;
 	next_key = kmalloc(map->key_size, GFP_USER);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 20f1871874df..a977c4f7b0ce 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -28,7 +28,7 @@ static int map_flags;
 
 static void test_hashmap(int task, void *data)
 {
-	long long key, next_key, value;
+	long long key, next_key, first_key, value;
 	int fd;
 
 	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
@@ -89,10 +89,13 @@ static void test_hashmap(int task, void *data)
 	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+	       (first_key == 1 || first_key == 2));
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
-	       (next_key == 1 || next_key == 2));
+	       (next_key == first_key));
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
-	       (next_key == 1 || next_key == 2));
+	       (next_key == 1 || next_key == 2) &&
+	       (next_key != first_key));
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
 	       errno == ENOENT);
 
@@ -105,6 +108,8 @@ static void test_hashmap(int task, void *data)
 
 	key = 0;
 	/* Check that map is empty. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+	       errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
 	       errno == ENOENT);
 
@@ -133,7 +138,7 @@ static void test_hashmap_percpu(int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
 	long long value[nr_cpus];
-	long long key, next_key;
+	long long key, next_key, first_key;
 	int expected_key_mask = 0;
 	int fd, i;
 
@@ -193,7 +198,13 @@ static void test_hashmap_percpu(int task, void *data)
 	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+	       ((expected_key_mask & first_key) == first_key));
 	while (!bpf_map_get_next_key(fd, &key, &next_key)) {
+		if (first_key) {
+			assert(next_key == first_key);
+			first_key = 0;
+		}
 		assert((expected_key_mask & next_key) == next_key);
 		expected_key_mask &= ~next_key;
 
@@ -219,6 +230,8 @@ static void test_hashmap_percpu(int task, void *data)
 
 	key = 0;
 	/* Check that map is empty. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+	       errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
 	       errno == ENOENT);
 
@@ -264,6 +277,8 @@ static void test_arraymap(int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
 	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
@@ -319,6 +334,8 @@ static void test_arraymap_percpu(int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
 	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
@@ -400,6 +417,8 @@ static void test_map_large(void)
 	       errno == E2BIG);
 
 	/* Iterate through all elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+	key.c = -1;
 	for (i = 0; i < MAP_SIZE; i++)
 		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
 	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
@@ -499,6 +518,7 @@ static void test_map_parallel(void)
 	       errno == EEXIST);
 
 	/* Check that all elements were inserted. */
+	assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
 	key = -1;
 	for (i = 0; i < MAP_SIZE; i++)
 		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
@@ -518,6 +538,7 @@ static void test_map_parallel(void)
 
 	/* Nothing should be left. */
 	key = -1;
+	assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
 }
 
-- 
2.9.3

^ permalink raw reply related

* Re: [PATCH net-next 0/2] flower: add MPLS matching support
From: Jamal Hadi Salim @ 2017-04-25  2:06 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: David Miller, benjamin.lahaise, netdev, bcrl, Jiri Pirko
In-Reply-To: <e58fd7ee-1bd0-0cd5-ab15-c510ec4e70bd@mojatatu.com>

On 17-04-24 10:00 PM, Jamal Hadi Salim wrote:
> On 17-04-24 09:48 PM, Jamal Hadi Salim wrote:

>
> Hrm. maybe I am wrong.
> Lets say user sets all of the 8 bits in BOS,
> what does setting
> key_val->mpls_bos = nla_get_u8 do?
>
> Same with the 20 bits for the label in the u32
> or 3 bit bits in the u8 tc.

The label and tc are masked - maybe just the BOS
needs something similar?

cheers,
jamal

^ permalink raw reply

* Re: [PATCH net-next 0/2] flower: add MPLS matching support
From: Jakub Kicinski @ 2017-04-25  2:07 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: David Miller, benjamin.lahaise, netdev, bcrl, Jiri Pirko
In-Reply-To: <8f6e7952-b87b-8959-b45a-ab687b69fb1b@mojatatu.com>

On Mon, 24 Apr 2017 22:06:08 -0400, Jamal Hadi Salim wrote:
> On 17-04-24 10:00 PM, Jamal Hadi Salim wrote:
> > On 17-04-24 09:48 PM, Jamal Hadi Salim wrote:  
> 
> >
> > Hrm. maybe I am wrong.
> > Lets say user sets all of the 8 bits in BOS,
> > what does setting
> > key_val->mpls_bos = nla_get_u8 do?
> >
> > Same with the 20 bits for the label in the u32
> > or 3 bit bits in the u8 tc.  
> 
> The label and tc are masked - maybe just the BOS
> needs something similar?

Indeed, good catch!

^ permalink raw reply

* Re: [PATCH v2] net: core: Prevent from dereferencing null pointer when
From: Myungho Jung @ 2017-04-25  2:39 UTC (permalink / raw)
  To: David Miller; +Cc: edumazet, netdev
In-Reply-To: <20170424.214450.2215530406378773476.davem@davemloft.net>

On Mon, Apr 24, 2017 at 09:44:50PM -0400, David Miller wrote:
> From: Myungho Jung <mhjungk@gmail.com>
> Date: Mon, 24 Apr 2017 18:00:52 -0700
> 
> > On Mon, Apr 24, 2017 at 12:02:35PM -0400, David Miller wrote:
> >> From: Myungho Jung <mhjungk@gmail.com>
> >> Date: Thu, 20 Apr 2017 16:59:20 -0700
> >> 
> >> > Added NULL check to make __dev_kfree_skb_irq consistent with kfree
> >> > family of functions.
> >> > 
> >> > Link: https://bugzilla.kernel.org/show_bug.cgi?id=195289
> >> > 
> >> > Signed-off-by: Myungho Jung <mhjungk@gmail.com>
> >> > ---
> >> > Changes in v2:
> >> >  - Correct category in subject
> >> 
> >> This subject line is an incomplete sentence.
> >> 
> >> This patch prevents dereferenccing a null pointer when "what"?
> > 
> > As it was reported on bugzilla, it would happen when plugging p54 usb device
> > to RPi2. But, i'm not 100% sure that this patch will resolve the issue because
> > the reporter didn't try my patch yet and I don't have the device to test.
> > 
> > And there might be some other places calling the function without checking
> > null pointer. The thing is that only the function don't check null among
> > kfree functions. So, I just hope this patch will prevent potential oops
> > issues.
> 
> It doesn't check for a NULL pointer because it is almost exclusively
> used in the interrupt paths where we know we have a non-NULL skb.

Yes, actually null is checked before calling the function in most
cases. That's why my first patch was applied not to net/core but to
p54 driver because I was worried about duplicated checking.

But, Christian suggested this patch to make it consistent with other
kfree functions and consume_skb, and Eric agreed with that.

Thanks,
Myungho

^ permalink raw reply

* [PATCH net-next] sparc64: Improve 64-bit constant loading in eBPF JIT.
From: David Miller @ 2017-04-25  2:53 UTC (permalink / raw)
  To: netdev; +Cc: sparclinux, ast, daniel


Doing a full 64-bit decomposition is really stupid especially for
simple values like 0 and -1.

But if we are going to optimize this, go all the way and try for all 2
and 3 instruction sequences not requiring a temporary register as
well.

Signed-off-by: David S. Miller <davem@davemloft.net>
---

This one is dedicated to all the bit twiddlers out there.

First we do the easy cases where it's a zero or sign extended
32-bit number (sethi+or, sethi+xor, respectively).

Then we try to find a range of set bits we can load simply then shift
up into place, in various ways.

Then we try negating the constant and see if we can do a simple
sequence using that with a xor at the end.  (f.e. the range of set
bits can't be loaded simply, but for the negated value it can)

The final optimized strategy involves 4 instructions sequences not
needing a temporary register.

Otherwise we sadly fully decompose using a temp..

Example, from ALU64_XOR_K: 0x0000ffffffff0000 ^ 0x0 = 0x0000ffffffff0000:

0000000000000000 <foo>:
   0:   9d e3 bf 50     save  %sp, -176, %sp
   4:   01 00 00 00     nop
   8:   90 10 00 18     mov  %i0, %o0
   c:   13 3f ff ff     sethi  %hi(0xfffffc00), %o1
  10:   92 12 63 ff     or  %o1, 0x3ff, %o1     ! ffffffff <foo+0xffffffff>
  14:   93 2a 70 10     sllx  %o1, 0x10, %o1
  18:   15 3f ff ff     sethi  %hi(0xfffffc00), %o2
  1c:   94 12 a3 ff     or  %o2, 0x3ff, %o2     ! ffffffff <foo+0xffffffff>
  20:   95 2a b0 10     sllx  %o2, 0x10, %o2
  24:   92 1a 60 00     xor  %o1, 0, %o1
  28:   12 e2 40 8a     cxbe  %o1, %o2, 38 <foo+0x38>
  2c:   9a 10 20 02     mov  2, %o5
  30:   10 60 00 03     b,pn   %xcc, 3c <foo+0x3c>
  34:   01 00 00 00     nop
  38:   9a 10 20 01     mov  1, %o5     ! 1 <foo+0x1>
  3c:   81 c7 e0 08     ret
  40:   91 eb 40 00     restore  %o5, %g0, %o0

Last optimization tonight, I promise :-)

 arch/sparc/net/bpf_jit_comp_64.c | 249 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 245 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 2b2f3c3..6869ec0 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -28,6 +28,11 @@ static inline bool is_simm5(unsigned int value)
 	return value + 0x10 < 0x20;
 }
 
+static inline bool is_sethi(unsigned int value)
+{
+	return (value & ~0x3fffff) == 0;
+}
+
 static void bpf_flush_icache(void *start_, void *end_)
 {
 	/* Cheetah's I-cache is fully coherent.  */
@@ -367,16 +372,252 @@ static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
 	}
 }
 
+static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
+				   int *hbsp, int *lbsp, int *abbasp)
+{
+	int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
+	int i;
+
+	lowest_bit_set = highest_bit_set = -1;
+	i = 0;
+	do {
+		if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
+			lowest_bit_set = i;
+		if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
+			highest_bit_set = (64 - i - 1);
+	}  while (++i < 32 && ((highest_bit_set == -1) ||
+			       (lowest_bit_set == -1)));
+	if (i == 32) {
+		i = 0;
+		do {
+			if ((lowest_bit_set == -1) && ((high_bits >> i) & 1))
+				lowest_bit_set = i + 32;
+			if ((highest_bit_set == -1) &&
+			    ((low_bits >> (32 - i - 1)) & 1))
+				highest_bit_set = 32 - i - 1;
+		} while (++i < 32 && ((highest_bit_set == -1)
+				      || (lowest_bit_set == -1)));
+	}
+
+	all_bits_between_are_set = 1;
+	for (i = lowest_bit_set; i <= highest_bit_set; i++) {
+		if (i < 32) {
+			if ((low_bits & (1 << i)) != 0)
+				continue;
+		} else {
+			if ((high_bits & (1 << (i - 32))) != 0)
+				continue;
+		}
+		all_bits_between_are_set = 0;
+		break;
+	}
+	*hbsp = highest_bit_set;
+	*lbsp = lowest_bit_set;
+	*abbasp = all_bits_between_are_set;
+}
+
+static unsigned long create_simple_focus_bits(unsigned long high_bits,
+					      unsigned long low_bits,
+					      int lowest_bit_set, int shift)
+{
+	long hi, lo;
+
+	if (lowest_bit_set < 32) {
+		lo = (low_bits >> lowest_bit_set) << shift;
+		hi = ((high_bits << (32 - lowest_bit_set)) << shift);
+	} else {
+		lo = 0;
+		hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
+	}
+	return hi | lo;
+}
+
+static bool const64_is_2insns (unsigned long high_bits,
+			       unsigned long low_bits)
+{
+	int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
+
+	if (high_bits == 0 || high_bits == 0xffffffff)
+		return true;
+
+	analyze_64bit_constant (high_bits, low_bits,
+				&highest_bit_set, &lowest_bit_set,
+				&all_bits_between_are_set);
+
+	if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
+	    all_bits_between_are_set != 0)
+		return true;
+
+	if ((highest_bit_set - lowest_bit_set) < 21)
+		return true;
+
+	return false;
+}
+
+static void sparc_emit_set_const64_quick2 (unsigned long high_bits,
+					   unsigned long low_imm,
+					   unsigned int dest,
+					   int shift_count, struct jit_ctx *ctx)
+{
+	emit_loadimm32(high_bits, dest, ctx);
+
+	/* Now shift it up into place.  */
+	emit_alu_K(SLLX, dest, shift_count, ctx);
+
+	/* If there is a low immediate part piece, finish up by
+	 * putting that in as well.
+	 */
+	if (low_imm != 0)
+		emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
+}
+
 static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
 {
+	int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
 	unsigned int tmp = bpf2sparc[TMP_REG_1];
-	u32 high_part = (K >> 32);
-	u32 low_part = (K & 0xffffffff);
+	u32 low_bits = (K & 0xffffffff);
+	u32 high_bits = (K >> 32);
+
+	/* These two tests also take care of all of the one
+	 * instruction cases.
+	 */
+	if (high_bits == 0xffffffff && (low_bits & 0x80000000))
+		return emit_loadimm_sext(K, dest, ctx);
+	if (high_bits == 0x00000000)
+		return emit_loadimm32(K, dest, ctx);
+
+	analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
+			       &lowest_bit_set, &all_bits_between_are_set);
+
+	/* 1) mov	-1, %reg
+	 *    sllx	%reg, shift, %reg
+	 * 2) mov	-1, %reg
+	 *    srlx	%reg, shift, %reg
+	 * 3) mov	some_small_const, %reg
+	 *    sllx	%reg, shift, %reg
+	 */
+	if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
+	     all_bits_between_are_set != 0) ||
+	    ((highest_bit_set - lowest_bit_set) < 12)) {
+		int shift = lowest_bit_set;
+		long the_const = -1;
+
+		if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
+		    all_bits_between_are_set == 0) {
+			the_const =
+				create_simple_focus_bits(high_bits, low_bits,
+							 lowest_bit_set, 0);
+		} else if (lowest_bit_set == 0)
+			shift = -(63 - highest_bit_set);
+
+		emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
+		if (shift > 0)
+			emit_alu_K(SLLX, dest, shift, ctx);
+		else if (shift < 0)
+			emit_alu_K(SRLX, dest, -shift, ctx);
+
+		return;
+	}
+
+	/* Now a range of 22 or less bits set somewhere.
+	 * 1) sethi	%hi(focus_bits), %reg
+	 *    sllx	%reg, shift, %reg
+	 * 2) sethi	%hi(focus_bits), %reg
+	 *    srlx	%reg, shift, %reg
+	 */
+	if ((highest_bit_set - lowest_bit_set) < 21) {
+		unsigned long focus_bits =
+			create_simple_focus_bits(high_bits, low_bits,
+						 lowest_bit_set, 10);
+
+		emit(SETHI(focus_bits, dest), ctx);
+
+		/* If lowest_bit_set == 10 then a sethi alone could
+		 * have done it.
+		 */
+		if (lowest_bit_set < 10)
+			emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
+		else if (lowest_bit_set > 10)
+			emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
+		return;
+	}
+
+	/* Ok, now 3 instruction sequences.  */
+	if (low_bits == 0) {
+		emit_loadimm32(high_bits, dest, ctx);
+		emit_alu_K(SLLX, dest, 32, ctx);
+		return;
+	}
+
+	/* We may be able to do something quick
+	 * when the constant is negated, so try that.
+	 */
+	if (const64_is_2insns ((~high_bits) & 0xffffffff,
+			       (~low_bits) & 0xfffffc00)) {
+		/* NOTE: The trailing bits get XOR'd so we need the
+		 * non-negated bits, not the negated ones.
+		 */
+		unsigned long trailing_bits = low_bits & 0x3ff;
+
+		if ((((~high_bits) & 0xffffffff) == 0 &&
+		     ((~low_bits) & 0x80000000) == 0) ||
+		    (((~high_bits) & 0xffffffff) == 0xffffffff &&
+		     ((~low_bits) & 0x80000000) != 0)) {
+			unsigned long fast_int = (~low_bits & 0xffffffff);
+
+			if ((is_sethi(fast_int) &&
+			     (~high_bits & 0xffffffff) == 0)) {
+				emit(SETHI(fast_int, dest), ctx);
+			} else if (is_simm13(fast_int)) {
+				emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
+			} else {
+				emit_loadimm64(fast_int, dest, ctx);
+			}
+		} else {
+			u64 n = ((~low_bits) & 0xfffffc00) |
+				(((unsigned long)((~high_bits) & 0xffffffff))<<32);
+			emit_loadimm64(n, dest, ctx);
+		}
+
+		low_bits = -0x400 | trailing_bits;
+
+		emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
+		return;
+	}
+
+	/* 1) sethi	%hi(xxx), %reg
+	 *    or	%reg, %lo(xxx), %reg
+	 *    sllx	%reg, yyy, %reg
+	 */
+	if ((highest_bit_set - lowest_bit_set) < 32) {
+		unsigned long focus_bits =
+			create_simple_focus_bits(high_bits, low_bits,
+						 lowest_bit_set, 0);
+
+		/* So what we know is that the set bits straddle the
+		 * middle of the 64-bit word.
+		 */
+		sparc_emit_set_const64_quick2(focus_bits, 0, dest,
+					      lowest_bit_set, ctx);
+		return;
+	}
+
+	/* 1) sethi	%hi(high_bits), %reg
+	 *    or	%reg, %lo(high_bits), %reg
+	 *    sllx	%reg, 32, %reg
+	 *    or	%reg, low_bits, %reg
+	 */
+	if (is_simm13(low_bits) && ((int)low_bits > 0)) {
+		sparc_emit_set_const64_quick2(high_bits, low_bits,
+					      dest, 32, ctx);
+		return;
+	}
 
+	/* Oh well, we tried... Do a full 64-bit decomposition.  */
 	ctx->tmp_1_used = true;
 
-	emit_set_const(high_part, tmp, ctx);
-	emit_set_const(low_part, dest, ctx);
+	emit_loadimm32(high_bits, tmp, ctx);
+	emit_loadimm32(low_bits, dest, ctx);
 	emit_alu_K(SLLX, tmp, 32, ctx);
 	emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
 }
-- 
2.1.2.532.g19b5d50

^ permalink raw reply related

* Re: [PATCH net-next] sparc64: Improve 64-bit constant loading in eBPF JIT.
From: Alexei Starovoitov @ 2017-04-25  3:18 UTC (permalink / raw)
  To: David Miller, netdev; +Cc: sparclinux, daniel
In-Reply-To: <20170424.225334.898469015162314835.davem@davemloft.net>

On 4/24/17 7:53 PM, David Miller wrote:
>
> Doing a full 64-bit decomposition is really stupid especially for
> simple values like 0 and -1.
>
> But if we are going to optimize this, go all the way and try for all 2
> and 3 instruction sequences not requiring a temporary register as
> well.
>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> ---
>
> This one is dedicated to all the bit twiddlers out there.
>
> First we do the easy cases where it's a zero or sign extended
> 32-bit number (sethi+or, sethi+xor, respectively).
>
> Then we try to find a range of set bits we can load simply then shift
> up into place, in various ways.
>
> Then we try negating the constant and see if we can do a simple
> sequence using that with a xor at the end.  (f.e. the range of set
> bits can't be loaded simply, but for the negated value it can)
>
> The final optimized strategy involves 4 instructions sequences not
> needing a temporary register.
>
> Otherwise we sadly fully decompose using a temp..
>
> Example, from ALU64_XOR_K: 0x0000ffffffff0000 ^ 0x0 = 0x0000ffffffff0000:
>
> 0000000000000000 <foo>:
>    0:   9d e3 bf 50     save  %sp, -176, %sp
>    4:   01 00 00 00     nop
>    8:   90 10 00 18     mov  %i0, %o0
>    c:   13 3f ff ff     sethi  %hi(0xfffffc00), %o1
>   10:   92 12 63 ff     or  %o1, 0x3ff, %o1     ! ffffffff <foo+0xffffffff>
>   14:   93 2a 70 10     sllx  %o1, 0x10, %o1
>   18:   15 3f ff ff     sethi  %hi(0xfffffc00), %o2
>   1c:   94 12 a3 ff     or  %o2, 0x3ff, %o2     ! ffffffff <foo+0xffffffff>
>   20:   95 2a b0 10     sllx  %o2, 0x10, %o2
>   24:   92 1a 60 00     xor  %o1, 0, %o1
>   28:   12 e2 40 8a     cxbe  %o1, %o2, 38 <foo+0x38>
>   2c:   9a 10 20 02     mov  2, %o5
>   30:   10 60 00 03     b,pn   %xcc, 3c <foo+0x3c>
>   34:   01 00 00 00     nop
>   38:   9a 10 20 01     mov  1, %o5     ! 1 <foo+0x1>
>   3c:   81 c7 e0 08     ret
>   40:   91 eb 40 00     restore  %o5, %g0, %o0

all of the above is very useful info that can be in commit log.

> Last optimization tonight, I promise :-)
>
>  arch/sparc/net/bpf_jit_comp_64.c | 249 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 245 insertions(+), 4 deletions(-)
>
> diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
> index 2b2f3c3..6869ec0 100644
> --- a/arch/sparc/net/bpf_jit_comp_64.c
> +++ b/arch/sparc/net/bpf_jit_comp_64.c
> @@ -28,6 +28,11 @@ static inline bool is_simm5(unsigned int value)
>  	return value + 0x10 < 0x20;
>  }
>
> +static inline bool is_sethi(unsigned int value)
> +{
> +	return (value & ~0x3fffff) == 0;
> +}
> +
>  static void bpf_flush_icache(void *start_, void *end_)
>  {
>  	/* Cheetah's I-cache is fully coherent.  */
> @@ -367,16 +372,252 @@ static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
>  	}
>  }
>
> +static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
> +				   int *hbsp, int *lbsp, int *abbasp)
> +{
> +	int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
> +	int i;
> +
> +	lowest_bit_set = highest_bit_set = -1;
> +	i = 0;
> +	do {
> +		if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
> +			lowest_bit_set = i;
> +		if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
> +			highest_bit_set = (64 - i - 1);
> +	}  while (++i < 32 && ((highest_bit_set == -1) ||
> +			       (lowest_bit_set == -1)));

copy pasta from gcc ? ;)
the number of extra () looks excessive.

> +static bool const64_is_2insns (unsigned long high_bits,
> +			       unsigned long low_bits)

extra space before (

> +{
> +	int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
> +
> +	if (high_bits == 0 || high_bits == 0xffffffff)
> +		return true;
> +
> +	analyze_64bit_constant (high_bits, low_bits,

another space

> +				&highest_bit_set, &lowest_bit_set,
> +				&all_bits_between_are_set);
> +
> +	if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
> +	    all_bits_between_are_set != 0)
> +		return true;
> +
> +	if ((highest_bit_set - lowest_bit_set) < 21)
> +		return true;

extra ()

> +static void sparc_emit_set_const64_quick2 (unsigned long high_bits,

extra space

> +	/* Now a range of 22 or less bits set somewhere.
> +	 * 1) sethi	%hi(focus_bits), %reg
> +	 *    sllx	%reg, shift, %reg
> +	 * 2) sethi	%hi(focus_bits), %reg
> +	 *    srlx	%reg, shift, %reg
> +	 */

was thinking whether any of these optimizations can be generalized to
other JITs. Probably not. sethi semantic is too sparc specific.

> +	if (const64_is_2insns ((~high_bits) & 0xffffffff,
> +			       (~low_bits) & 0xfffffc00)) {

all gcc-ism actually gives confidence that most likely
it's all good and battle proven logic :)


^ permalink raw reply

* Re: [PATCH net] netvsc: fix calculation of available send sections
From: Greg Rose @ 2017-04-25  3:20 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: davem, netdev, Stephen Hemminger
In-Reply-To: <20170425013338.2653-1-sthemmin@microsoft.com>

On Mon, 2017-04-24 at 18:33 -0700, Stephen Hemminger wrote:
> My change (introduced in 4.11) to use find_first_clear_bit
> incorrectly assumed that the size argument was words, not bits.

Oops...

> The effect was only a small limited number of the available send
> sections were being actually used. This can cause performance loss
> with some workloads.
> 
> Since map_words is now used only during initialization, it can
> be on stack instead of in per-device data.
> 
> Fixes: b58a185801da ("netvsc: simplify get next send section")
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>

Looks good to me.

Reviewed by Greg Rose <gvrose8192@gmail.com>

> ---
>  drivers/net/hyperv/hyperv_net.h | 1 -
>  drivers/net/hyperv/netvsc.c     | 9 ++++-----
>  2 files changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index f9f3dba7a588..db23cb36ae5c 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -751,7 +751,6 @@ struct netvsc_device {
>  	u32 send_section_cnt;
>  	u32 send_section_size;
>  	unsigned long *send_section_map;
> -	int map_words;
>  
>  	/* Used for NetVSP initialization protocol */
>  	struct completion channel_init_wait;
> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index 8dd0b8770328..15ef713d96c0 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -236,6 +236,7 @@ static int netvsc_init_buf(struct hv_device *device)
>  	struct netvsc_device *net_device;
>  	struct nvsp_message *init_packet;
>  	struct net_device *ndev;
> +	size_t map_words;
>  	int node;
>  
>  	net_device = get_outbound_net_device(device);
> @@ -401,11 +402,9 @@ static int netvsc_init_buf(struct hv_device *device)
>  		   net_device->send_section_size, net_device->send_section_cnt);
>  
>  	/* Setup state for managing the send buffer. */
> -	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
> -					     BITS_PER_LONG);
> +	map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
>  
> -	net_device->send_section_map = kcalloc(net_device->map_words,
> -					       sizeof(ulong), GFP_KERNEL);
> +	net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
>  	if (net_device->send_section_map == NULL) {
>  		ret = -ENOMEM;
>  		goto cleanup;
> @@ -683,7 +682,7 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
>  	unsigned long *map_addr = net_device->send_section_map;
>  	unsigned int i;
>  
> -	for_each_clear_bit(i, map_addr, net_device->map_words) {
> +	for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
>  		if (sync_test_and_set_bit(i, map_addr) == 0)
>  			return i;
>  	}

^ permalink raw reply

* Re: [PATCH net-next] sparc64: Improve 64-bit constant loading in eBPF JIT.
From: David Miller @ 2017-04-25  3:28 UTC (permalink / raw)
  To: ast; +Cc: netdev, sparclinux, daniel
In-Reply-To: <74973b5c-5380-b27b-3cf6-7b1444cade02@fb.com>

From: Alexei Starovoitov <ast@fb.com>
Date: Mon, 24 Apr 2017 20:18:56 -0700

> all gcc-ism actually gives confidence that most likely
> it's all good and battle proven logic :)

Yes, this is old code written 15 years ago :)

I'll fix up the spaces.

^ permalink raw reply

* [PATCH net-next v2] sparc64: Improve 64-bit constant loading in eBPF JIT.
From: David Miller @ 2017-04-25  3:38 UTC (permalink / raw)
  To: netdev; +Cc: sparclinux, ast, daniel


Doing a full 64-bit decomposition is really stupid especially for
simple values like 0 and -1.

But if we are going to optimize this, go all the way and try for all 2
and 3 instruction sequences not requiring a temporary register as
well.

First we do the easy cases where it's a zero or sign extended 32-bit
number (sethi+or, sethi+xor, respectively).

Then we try to find a range of set bits we can load simply then shift
up into place, in various ways.

Then we try negating the constant and see if we can do a simple
sequence using that with a xor at the end.  (f.e. the range of set
bits can't be loaded simply, but for the negated value it can)

The final optimized strategy involves 4 instructions sequences not
needing a temporary register.

Otherwise we sadly fully decompose using a temp..

Example, from ALU64_XOR_K: 0x0000ffffffff0000 ^ 0x0 = 0x0000ffffffff0000:

0000000000000000 <foo>:
   0:   9d e3 bf 50     save  %sp, -176, %sp
   4:   01 00 00 00     nop
   8:   90 10 00 18     mov  %i0, %o0
   c:   13 3f ff ff     sethi  %hi(0xfffffc00), %o1
  10:   92 12 63 ff     or  %o1, 0x3ff, %o1     ! ffffffff <foo+0xffffffff>
  14:   93 2a 70 10     sllx  %o1, 0x10, %o1
  18:   15 3f ff ff     sethi  %hi(0xfffffc00), %o2
  1c:   94 12 a3 ff     or  %o2, 0x3ff, %o2     ! ffffffff <foo+0xffffffff>
  20:   95 2a b0 10     sllx  %o2, 0x10, %o2
  24:   92 1a 60 00     xor  %o1, 0, %o1
  28:   12 e2 40 8a     cxbe  %o1, %o2, 38 <foo+0x38>
  2c:   9a 10 20 02     mov  2, %o5
  30:   10 60 00 03     b,pn   %xcc, 3c <foo+0x3c>
  34:   01 00 00 00     nop
  38:   9a 10 20 01     mov  1, %o5     ! 1 <foo+0x1>
  3c:   81 c7 e0 08     ret
  40:   91 eb 40 00     restore  %o5, %g0, %o0

Signed-off-by: David S. Miller <davem@davemloft.net>
---

v2:
 - fix spaces (Alexei)
 - put details into commit msg (Alexei)

 arch/sparc/net/bpf_jit_comp_64.c | 249 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 245 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 2b2f3c3..ec7d10d 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -28,6 +28,11 @@ static inline bool is_simm5(unsigned int value)
 	return value + 0x10 < 0x20;
 }
 
+static inline bool is_sethi(unsigned int value)
+{
+	return (value & ~0x3fffff) == 0;
+}
+
 static void bpf_flush_icache(void *start_, void *end_)
 {
 	/* Cheetah's I-cache is fully coherent.  */
@@ -367,16 +372,252 @@ static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
 	}
 }
 
+static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
+				   int *hbsp, int *lbsp, int *abbasp)
+{
+	int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
+	int i;
+
+	lowest_bit_set = highest_bit_set = -1;
+	i = 0;
+	do {
+		if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
+			lowest_bit_set = i;
+		if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
+			highest_bit_set = (64 - i - 1);
+	}  while (++i < 32 && (highest_bit_set == -1 ||
+			       lowest_bit_set == -1));
+	if (i == 32) {
+		i = 0;
+		do {
+			if (lowest_bit_set == -1 && ((high_bits >> i) & 1))
+				lowest_bit_set = i + 32;
+			if (highest_bit_set == -1 &&
+			    ((low_bits >> (32 - i - 1)) & 1))
+				highest_bit_set = 32 - i - 1;
+		} while (++i < 32 && (highest_bit_set == -1 ||
+				      lowest_bit_set == -1));
+	}
+
+	all_bits_between_are_set = 1;
+	for (i = lowest_bit_set; i <= highest_bit_set; i++) {
+		if (i < 32) {
+			if ((low_bits & (1 << i)) != 0)
+				continue;
+		} else {
+			if ((high_bits & (1 << (i - 32))) != 0)
+				continue;
+		}
+		all_bits_between_are_set = 0;
+		break;
+	}
+	*hbsp = highest_bit_set;
+	*lbsp = lowest_bit_set;
+	*abbasp = all_bits_between_are_set;
+}
+
+static unsigned long create_simple_focus_bits(unsigned long high_bits,
+					      unsigned long low_bits,
+					      int lowest_bit_set, int shift)
+{
+	long hi, lo;
+
+	if (lowest_bit_set < 32) {
+		lo = (low_bits >> lowest_bit_set) << shift;
+		hi = ((high_bits << (32 - lowest_bit_set)) << shift);
+	} else {
+		lo = 0;
+		hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
+	}
+	return hi | lo;
+}
+
+static bool const64_is_2insns(unsigned long high_bits,
+			      unsigned long low_bits)
+{
+	int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
+
+	if (high_bits == 0 || high_bits == 0xffffffff)
+		return true;
+
+	analyze_64bit_constant(high_bits, low_bits,
+			       &highest_bit_set, &lowest_bit_set,
+			       &all_bits_between_are_set);
+
+	if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
+	    all_bits_between_are_set != 0)
+		return true;
+
+	if (highest_bit_set - lowest_bit_set < 21)
+		return true;
+
+	return false;
+}
+
+static void sparc_emit_set_const64_quick2(unsigned long high_bits,
+					  unsigned long low_imm,
+					  unsigned int dest,
+					  int shift_count, struct jit_ctx *ctx)
+{
+	emit_loadimm32(high_bits, dest, ctx);
+
+	/* Now shift it up into place.  */
+	emit_alu_K(SLLX, dest, shift_count, ctx);
+
+	/* If there is a low immediate part piece, finish up by
+	 * putting that in as well.
+	 */
+	if (low_imm != 0)
+		emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
+}
+
 static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
 {
+	int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
 	unsigned int tmp = bpf2sparc[TMP_REG_1];
-	u32 high_part = (K >> 32);
-	u32 low_part = (K & 0xffffffff);
+	u32 low_bits = (K & 0xffffffff);
+	u32 high_bits = (K >> 32);
+
+	/* These two tests also take care of all of the one
+	 * instruction cases.
+	 */
+	if (high_bits == 0xffffffff && (low_bits & 0x80000000))
+		return emit_loadimm_sext(K, dest, ctx);
+	if (high_bits == 0x00000000)
+		return emit_loadimm32(K, dest, ctx);
+
+	analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
+			       &lowest_bit_set, &all_bits_between_are_set);
+
+	/* 1) mov	-1, %reg
+	 *    sllx	%reg, shift, %reg
+	 * 2) mov	-1, %reg
+	 *    srlx	%reg, shift, %reg
+	 * 3) mov	some_small_const, %reg
+	 *    sllx	%reg, shift, %reg
+	 */
+	if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
+	     all_bits_between_are_set != 0) ||
+	    ((highest_bit_set - lowest_bit_set) < 12)) {
+		int shift = lowest_bit_set;
+		long the_const = -1;
+
+		if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
+		    all_bits_between_are_set == 0) {
+			the_const =
+				create_simple_focus_bits(high_bits, low_bits,
+							 lowest_bit_set, 0);
+		} else if (lowest_bit_set == 0)
+			shift = -(63 - highest_bit_set);
+
+		emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
+		if (shift > 0)
+			emit_alu_K(SLLX, dest, shift, ctx);
+		else if (shift < 0)
+			emit_alu_K(SRLX, dest, -shift, ctx);
+
+		return;
+	}
+
+	/* Now a range of 22 or less bits set somewhere.
+	 * 1) sethi	%hi(focus_bits), %reg
+	 *    sllx	%reg, shift, %reg
+	 * 2) sethi	%hi(focus_bits), %reg
+	 *    srlx	%reg, shift, %reg
+	 */
+	if ((highest_bit_set - lowest_bit_set) < 21) {
+		unsigned long focus_bits =
+			create_simple_focus_bits(high_bits, low_bits,
+						 lowest_bit_set, 10);
+
+		emit(SETHI(focus_bits, dest), ctx);
+
+		/* If lowest_bit_set == 10 then a sethi alone could
+		 * have done it.
+		 */
+		if (lowest_bit_set < 10)
+			emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
+		else if (lowest_bit_set > 10)
+			emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
+		return;
+	}
+
+	/* Ok, now 3 instruction sequences.  */
+	if (low_bits == 0) {
+		emit_loadimm32(high_bits, dest, ctx);
+		emit_alu_K(SLLX, dest, 32, ctx);
+		return;
+	}
+
+	/* We may be able to do something quick
+	 * when the constant is negated, so try that.
+	 */
+	if (const64_is_2insns((~high_bits) & 0xffffffff,
+			      (~low_bits) & 0xfffffc00)) {
+		/* NOTE: The trailing bits get XOR'd so we need the
+		 * non-negated bits, not the negated ones.
+		 */
+		unsigned long trailing_bits = low_bits & 0x3ff;
+
+		if ((((~high_bits) & 0xffffffff) == 0 &&
+		     ((~low_bits) & 0x80000000) == 0) ||
+		    (((~high_bits) & 0xffffffff) == 0xffffffff &&
+		     ((~low_bits) & 0x80000000) != 0)) {
+			unsigned long fast_int = (~low_bits & 0xffffffff);
+
+			if ((is_sethi(fast_int) &&
+			     (~high_bits & 0xffffffff) == 0)) {
+				emit(SETHI(fast_int, dest), ctx);
+			} else if (is_simm13(fast_int)) {
+				emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
+			} else {
+				emit_loadimm64(fast_int, dest, ctx);
+			}
+		} else {
+			u64 n = ((~low_bits) & 0xfffffc00) |
+				(((unsigned long)((~high_bits) & 0xffffffff))<<32);
+			emit_loadimm64(n, dest, ctx);
+		}
+
+		low_bits = -0x400 | trailing_bits;
+
+		emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
+		return;
+	}
+
+	/* 1) sethi	%hi(xxx), %reg
+	 *    or	%reg, %lo(xxx), %reg
+	 *    sllx	%reg, yyy, %reg
+	 */
+	if ((highest_bit_set - lowest_bit_set) < 32) {
+		unsigned long focus_bits =
+			create_simple_focus_bits(high_bits, low_bits,
+						 lowest_bit_set, 0);
+
+		/* So what we know is that the set bits straddle the
+		 * middle of the 64-bit word.
+		 */
+		sparc_emit_set_const64_quick2(focus_bits, 0, dest,
+					      lowest_bit_set, ctx);
+		return;
+	}
+
+	/* 1) sethi	%hi(high_bits), %reg
+	 *    or	%reg, %lo(high_bits), %reg
+	 *    sllx	%reg, 32, %reg
+	 *    or	%reg, low_bits, %reg
+	 */
+	if (is_simm13(low_bits) && ((int)low_bits > 0)) {
+		sparc_emit_set_const64_quick2(high_bits, low_bits,
+					      dest, 32, ctx);
+		return;
+	}
 
+	/* Oh well, we tried... Do a full 64-bit decomposition.  */
 	ctx->tmp_1_used = true;
 
-	emit_set_const(high_part, tmp, ctx);
-	emit_set_const(low_part, dest, ctx);
+	emit_loadimm32(high_bits, tmp, ctx);
+	emit_loadimm32(low_bits, dest, ctx);
 	emit_alu_K(SLLX, tmp, 32, ctx);
 	emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
 }
-- 
2.1.2.532.g19b5d50


^ permalink raw reply related

* Re: qed*: debug infrastructures
From: Jakub Kicinski @ 2017-04-25  3:44 UTC (permalink / raw)
  To: Elior, Ariel
  Cc: David Miller, netdev@vger.kernel.org, Mintz, Yuval, Tayar, Tomer,
	Dupuis, Chad
In-Reply-To: <CY1PR0701MB1337FA26F1503FC9928B0340901F0@CY1PR0701MB1337.namprd07.prod.outlook.com>

On Mon, 24 Apr 2017 17:38:57 +0000, Elior, Ariel wrote:
> Hi Dave,

Hi Ariel!

I'm not Dave but let me share my perspective :)

> According to the recent messages on the list indicating debugfs is not the way
> to go, I am looking for some guidance on what is. dpipe approach was
> mentioned as favorable, but I wanted to make sure molding our debug features to
> this infrastructure will result in something acceptable. A few points:
> 
> 1.
> One of our HW debug features is a signal recording feature. HW is configured to
> output specific signals, which are then continuously dumped into a cyclic
> buffer on host. There are ~8000 signals, which can be logically divided to ~100
> groups. I believe this can be modeled in dpipe (or similar tool) as a set of
> ~100 tables with ~80 entries each, and user would be able to see them all and
> choose what they like. The output data itself is binary, and meaningless to
> "the user". The amount of data is basically as large a contiguous buffer as
> driver can allocate, i.e. usually 4MB. When user selects the signals, and sets
> meta data regarding to mode of operations, some device configuration will have
> to take place. Does that sound reasonable?
> This debug feature already exists out of tree for bnx2x and qed* drivers and is
> *very* effective in field deployments. I would very much like to see this as an
> in-tree feature via some infrastructure or another.

Sorry for even mentioning it, new debug interfaces would be cool, but
for FW/HW state dumps which are meaningless to the user why not just
use ethtool get-dump/set-dump?  Do you really need the ability to
toggle those 8k signals one-by-one or are there reasonable sets you
could provide from the driver that you could encode on the available
32bits of flags?

What could be useful would be some form of start/stop commands for
debugging to tell the driver/FW when to record events selected by
set-dump and maybe a way for the user to discover what dumps the driver
can provide (a'la ethtool private flags).

> 2.
> Sometimes we want to debug the probe or removal flow of the driver. ethtool has
> the disadvantage of only being available once network device is available. If a
> bug stops the load flow before the ethtool debug paths are available, there is
> no way to collect a dump. Similarly, removal flows which hit a bug but do remove
> the network device, can't be debugged from ethtool. Does dpipe suffer from the
> same problem? qed* (like mlx*) has a common-functionality module. This allows
> creating debugfs nodes even before the network drivers are probed, providing a
> solution for this (debug nodes are also available after network driver removal).
> If dpipe does hold an answer here (e.g. provide preconfiguration which would
> kick in when network device registers) then we might want to port all of our
> register dump logic over there for this advantage. Does that sound reasonable?

Porting the debug/dump infrastructure to devlink would be very much
appreciated.  I'm not sure it would fit into dpipe or be a separate
command.

> 3.
> Yuval mentioned this, but I wanted to reiterate that the same is necessary for
> our storage drivers (qedi/qedf). debugfs does have the advantage of being non
> sub-system specific. Is there perhaps another non subsystem specific debug
> infrastructure which *is* acceptable to the networking subsystem? My guess is
> that the storage drivers will turn to debugfs (in their own subsystem).

devlink is not ethernet-specific, it should be a good fit for iSCSI and
FCOE drivers, too.

^ permalink raw reply

* [PATCH net 1/1] qed: Fix error in the dcbx app meta data initialization.
From: Sudarsana Reddy Kalluru @ 2017-04-25  3:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yuval.Mintz

DCBX app_data array is initialized with the incorrect values for
personality field. This would  prevent offloaded protocols from
honoring the PFC.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index a6e2bbe..cfdadb6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -64,11 +64,11 @@
 	((u32)(prio_tc_tbl >> ((7 - prio) * 4)) & 0x7)
 
 static const struct qed_dcbx_app_metadata qed_dcbx_app_update[] = {
-	{DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_DEFAULT},
-	{DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH}
+	{DCBX_PROTOCOL_ISCSI, "ISCSI", QED_PCI_ISCSI},
+	{DCBX_PROTOCOL_FCOE, "FCOE", QED_PCI_FCOE},
+	{DCBX_PROTOCOL_ROCE, "ROCE", QED_PCI_ETH_ROCE},
+	{DCBX_PROTOCOL_ROCE_V2, "ROCE_V2", QED_PCI_ETH_ROCE},
+	{DCBX_PROTOCOL_ETH, "ETH", QED_PCI_ETH},
 };
 
 static bool qed_dcbx_app_ethtype(u32 app_info_bitmap)
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH RFC] ptr_ring: add ptr_ring_unconsume
From: Jason Wang @ 2017-04-25  4:07 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: linux-kernel, netdev
In-Reply-To: <20170424145632-mutt-send-email-mst@kernel.org>



On 2017年04月24日 20:00, Michael S. Tsirkin wrote:
> On Mon, Apr 24, 2017 at 07:54:18PM +0800, Jason Wang wrote:
>> On 2017年04月24日 07:28, Michael S. Tsirkin wrote:
>>> On Tue, Apr 18, 2017 at 11:07:42AM +0800, Jason Wang wrote:
>>>> On 2017年04月17日 07:19, Michael S. Tsirkin wrote:
>>>>> Applications that consume a batch of entries in one go
>>>>> can benefit from ability to return some of them back
>>>>> into the ring.
>>>>>
>>>>> Add an API for that - assuming there's space. If there's no space
>>>>> naturally we can't do this and have to drop entries, but this implies
>>>>> ring is full so we'd likely drop some anyway.
>>>>>
>>>>> Signed-off-by: Michael S. Tsirkin<mst@redhat.com>
>>>>> ---
>>>>>
>>>>> Jason, in my mind the biggest issue with your batching patchset is the
>>>>> backet drops on disconnect.  This API will help avoid that in the common
>>>>> case.
>>>> Ok, I will rebase the series on top of this. (Though I don't think we care
>>>> the packet loss).
>>> E.g. I care - I often start sending packets to VM before it's
>>> fully booted. Several vhost resets might follow.
>> Ok.
>>
>>>>> I would still prefer that we understand what's going on,
>>>> I try to reply in another thread, does it make sense?
>>>>
>>>>>     and I would
>>>>> like to know what's the smallest batch size that's still helpful,
>>>> Yes, I've replied in another thread, the result is:
>>>>
>>>>
>>>> no batching   1.88Mpps
>>>> RX_BATCH=1    1.93Mpps
>>>> RX_BATCH=4    2.11Mpps
>>>> RX_BATCH=16   2.14Mpps
>>>> RX_BATCH=64   2.25Mpps
>>>> RX_BATCH=256  2.18Mpps
>>> Essentially 4 is enough, other stuf looks more like noise
>>> to me. What about 2?
>> The numbers are pretty stable, so probably not noise. Retested on top of
>> batch zeroing:
>>
>> no  1.97Mpps
>> 1   2.09Mpps
>> 2   2.11Mpps
>> 4   2.16Mpps
>> 8   2.19Mpps
>> 16  2.21Mpps
>> 32  2.25Mpps
>> 64  2.30Mpps
>> 128 2.21Mpps
>> 256 2.21Mpps
>>
>> 64 performs best.
>>
>> Thanks
> OK but it might be e.g. a function of the ring size, host cache size or
> whatever. As we don't really understand the why, if we just optimize for
> your setup we risk regressions in others.  64 entries is a lot, it
> increases the queue size noticeably.  Could this be part of the effect?
> Could you try changing the queue size to see what happens?

I increase tx_queue_len to 1100, but only see less than 1% improvement 
on pps number (batch = 1) in my machine. If you care about the 
regression, we probably can leave the choice to user through e.g module 
parameter. But I'm afraid we have already had too much choices for them. 
Or I can test this with different CPU types.

Thanks

>

^ permalink raw reply

* Re: net/ipv6: slab-out-of-bounds in ip6_tnl_xmit
From: Cong Wang @ 2017-04-25  5:04 UTC (permalink / raw)
  To: Andrey Konovalov
  Cc: David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet,
	Dmitry Vyukov, Kostya Serebryany, syzkaller
In-Reply-To: <CAM_iQpXLHx=NhSNYU_o4F_V6nNXUHGBSdV=QQnML0B+HD7MbBQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 889 bytes --]

On Mon, Apr 24, 2017 at 9:47 AM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>
> We use ipv4 dst in ip6_tunnel and cast an IPv4 neigh key as an
> IPv6 address...
>
>
>                 neigh = dst_neigh_lookup(skb_dst(skb),
>                                          &ipv6_hdr(skb)->daddr);
>                 if (!neigh)
>                         goto tx_err_link_failure;
>
>                 addr6 = (struct in6_addr *)&neigh->primary_key; // <=== HERE
>                 addr_type = ipv6_addr_type(addr6);
>
>                 if (addr_type == IPV6_ADDR_ANY)
>                         addr6 = &ipv6_hdr(skb)->daddr;
>
>                 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
>
> Also the network header of the skb at this point should be still IPv4?

Please try the attached patch.

I am not sure how we could handle 4in6 case better than just relying on
the config of ip6 tunnel.

[-- Attachment #2: ip6_tunnel.diff --]
[-- Type: text/plain, Size: 1888 bytes --]

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 75fac93..a9692ec 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1037,7 +1037,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net *net = t->net;
 	struct net_device_stats *stats = &t->dev->stats;
-	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct ipv6hdr *ipv6h;
 	struct ipv6_tel_txoption opt;
 	struct dst_entry *dst = NULL, *ndst = NULL;
 	struct net_device *tdev;
@@ -1057,26 +1057,28 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 
 	/* NBMA tunnel */
 	if (ipv6_addr_any(&t->parms.raddr)) {
-		struct in6_addr *addr6;
-		struct neighbour *neigh;
-		int addr_type;
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			struct in6_addr *addr6;
+			struct neighbour *neigh;
+			int addr_type;
 
-		if (!skb_dst(skb))
-			goto tx_err_link_failure;
+			if (!skb_dst(skb))
+				goto tx_err_link_failure;
 
-		neigh = dst_neigh_lookup(skb_dst(skb),
-					 &ipv6_hdr(skb)->daddr);
-		if (!neigh)
-			goto tx_err_link_failure;
+			neigh = dst_neigh_lookup(skb_dst(skb),
+						 &ipv6_hdr(skb)->daddr);
+			if (!neigh)
+				goto tx_err_link_failure;
 
-		addr6 = (struct in6_addr *)&neigh->primary_key;
-		addr_type = ipv6_addr_type(addr6);
+			addr6 = (struct in6_addr *)&neigh->primary_key;
+			addr_type = ipv6_addr_type(addr6);
 
-		if (addr_type == IPV6_ADDR_ANY)
-			addr6 = &ipv6_hdr(skb)->daddr;
+			if (addr_type == IPV6_ADDR_ANY)
+				addr6 = &ipv6_hdr(skb)->daddr;
 
-		memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
-		neigh_release(neigh);
+			memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+			neigh_release(neigh);
+		}
 	} else if (!(t->parms.flags &
 		     (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
 		/* enable the cache only only if the routing decision does

^ permalink raw reply related

* Re: [PATCH] stmmac: Add support for SIMATIC IOT2000 platform
From: Jan Kiszka @ 2017-04-25  5:44 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Giuseppe Cavallaro, Alexandre Torgue, netdev,
	Linux Kernel Mailing List, Sascha Weisenberger
In-Reply-To: <CAHp75VcdpOXa+aQ6-iKamTAbX9KGZOWpukygSUS7Ef_E3UNvuw@mail.gmail.com>

On 2017-04-24 23:27, Andy Shevchenko wrote:
> On Mon, Apr 24, 2017 at 10:27 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>> The IOT2000 is industrial controller platform, derived from the Intel
>> Galileo Gen2 board. The variant IOT2020 comes with one LAN port, the
>> IOT2040 has two of them. They can be told apart based on the board asset
>> tag in the DMI table.
>>
>> Based on patch by Sascha Weisenberger.
>>
> 
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> Signed-off-by: Sascha Weisenberger <sascha.weisenberger@siemens.com>
> 
> Shoudn't be ordered other way around?

Nope. My changes invalidated Sascha's signed-off on the original patch,
but he signed off again on the final version.

> 
>> +       const char *asset_tag;
> 
> I guess this is redundant. See below.
> 
>> +       {
>> +               .name = "SIMATIC IOT2000",
>> +               .asset_tag = "6ES7647-0AA00-0YA2",
>> +               .func = 6,
>> +               .phy_addr = 1,
>> +       },
> 
> The below has same definition disregard on asset_tag.
> 

There is a small difference in the asset tag, just not at the last digit
where one may expect it, look:

...-0YA2 -> IOT2020
...-1YA2 -> IOT2040

>> +       {
>> +               .name = "SIMATIC IOT2000",
>> +               .asset_tag = "6ES7647-0AA00-1YA2",
>> +               .func = 6,
>> +               .phy_addr = 1,
>> +       },
>> +       {
>> +               .name = "SIMATIC IOT2000",
>> +               .asset_tag = "6ES7647-0AA00-1YA2",
>> +               .func = 7,
>> +               .phy_addr = 1,
>> +       },
> 
> How this supposed to work if phy_addr is the same?
> 

That address space is MAC-local, and we have two different MACs here.

Jan

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply

* [PATCH 0/8] Fix and complete CAN namespace support
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev

Hello Dave,

unfortunately the initial network namespace support by Mario Kicherer
(8e8cda6d737d) slipped into net-next without further review and Marc pushed
the code without my Acked-by. Due to the fact that this code was in net-next
now I spent some nights to fix, clean up, finalize and test the missing pieces
for the namespace support for the CAN subsystem in net/can.

As Marc is currently *VERY* unresponsive on the mailing list due to his 'real'
job I send this patch set directly to you to make sure it gets through the
current merge window. We are already in -rc8 and I would like to avoid to push
an incomplete functionality - that has to be fixed - to Linus.

This patch set is based on the latest net-next.

Thanks,
Oliver

Oliver Hartkopp (8):
  can: fix memory leak in initial namespace support
  can: remove obsolete pernet_operations definitions
  can: remove obsolete definitions
  can: complete initial namespace support
  can: network namespace support for CAN_BCM protocol
  can: network namespace support for CAN gateway
  can: add Virtual CAN Tunnel driver (vxcan)
  can: enable module auto loading for virtual CAN interfaces

 drivers/net/can/Kconfig        |  18 +++
 drivers/net/can/Makefile       |   1 +
 drivers/net/can/vcan.c         |   7 +-
 drivers/net/can/vxcan.c        | 316 +++++++++++++++++++++++++++++++++++++++++
 include/linux/can/core.h       |   4 +-
 include/net/netns/can.h        |   9 ++
 include/uapi/linux/can/vxcan.h |  12 ++
 net/can/af_can.c               |  77 +++++-----
 net/can/af_can.h               |   9 --
 net/can/bcm.c                  |  90 +++++++-----
 net/can/gw.c                   |  72 ++++++----
 net/can/proc.c                 | 141 +++++++++---------
 12 files changed, 580 insertions(+), 176 deletions(-)
 create mode 100644 drivers/net/can/vxcan.c
 create mode 100644 include/uapi/linux/can/vxcan.h

-- 
2.11.0


^ permalink raw reply

* [PATCH 1/8] can: fix memory leak in initial namespace support
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>

The can_rx_alldev_list is a per-net data structure now and allocated in
can_pernet_init(). Make sure the memory is free'd in can_pernet_exit() too.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 net/can/af_can.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/can/af_can.c b/net/can/af_can.c
index abf7d854a94d..2c935babe466 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -903,6 +903,8 @@ static void can_pernet_exit(struct net *net)
 		}
 	}
 	rcu_read_unlock();
+
+	kfree(net->can.can_rx_alldev_list);
 }
 
 /*
-- 
2.11.0


^ permalink raw reply related

* [PATCH 2/8] can: remove obsolete pernet_operations definitions
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>

The namespace support for the CAN subsystem does not need any additional
memory. So when ".size = 0" there's no extra memory allocated by the system.
And therefore ".id" is obsolete too.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 net/can/af_can.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 2c935babe466..421b60fc42c3 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -75,8 +75,6 @@ static int stats_timer __read_mostly = 1;
 module_param(stats_timer, int, S_IRUGO);
 MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
 
-static int can_net_id;
-
 static struct kmem_cache *rcv_cache __read_mostly;
 
 /* table of registered CAN protocols */
@@ -935,8 +933,6 @@ static struct notifier_block can_netdev_notifier __read_mostly = {
 static struct pernet_operations can_pernet_ops __read_mostly = {
 	.init = can_pernet_init,
 	.exit = can_pernet_exit,
-	.id = &can_net_id,
-	.size = 0,
 };
 
 static __init int can_init(void)
-- 
2.11.0


^ permalink raw reply related

* [PATCH 3/8] can: remove obsolete definitions
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>

can_rx_alldev_list is a per-net data structure now. Remove it's definition
here and can_rx_dev_list too.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 net/can/af_can.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/can/af_can.h b/net/can/af_can.h
index f273c9d9b129..84a35e97c5e0 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -110,9 +110,6 @@ struct s_pstats {
 	unsigned long rcv_entries_max;
 };
 
-/* receive filters subscribed for 'all' CAN devices */
-extern struct dev_rcv_lists can_rx_alldev_list;
-
 /* function prototypes for the CAN networklayer procfs (proc.c) */
 void can_init_proc(struct net *net);
 void can_remove_proc(struct net *net);
@@ -122,6 +119,5 @@ void can_stat_update(unsigned long data);
 extern struct timer_list can_stattimer;    /* timer for statistics update */
 extern struct s_stats    can_stats;        /* packet statistics */
 extern struct s_pstats   can_pstats;       /* receive list statistics */
-extern struct hlist_head can_rx_dev_list;  /* rx dispatcher structures */
 
 #endif /* AF_CAN_H */
-- 
2.11.0


^ permalink raw reply related

* [PATCH 6/8] can: network namespace support for CAN gateway
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>

The CAN gateway was not implemented as per-net in the initial network
namespace support by Mario Kicherer (8e8cda6d737d).
This patch enables the CAN gateway to be used in different namespaces.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 include/net/netns/can.h |  3 +++
 net/can/gw.c            | 72 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index 0f3c31aab8a8..b106e6ae2e5b 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -32,6 +32,9 @@ struct netns_can {
 	struct timer_list can_stattimer;/* timer for statistics update */
 	struct s_stats *can_stats;	/* packet statistics */
 	struct s_pstats *can_pstats;	/* receive list statistics */
+
+	/* CAN GW per-net gateway jobs */
+	struct hlist_head cgw_list;
 };
 
 #endif /* __NETNS_CAN_H__ */
diff --git a/net/can/gw.c b/net/can/gw.c
index ad5bf5d508d3..29748d844c3f 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1,7 +1,7 @@
 /*
  * gw.c - CAN frame Gateway/Router/Bridge with netlink interface
  *
- * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * Copyright (c) 2017 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
-#define CAN_GW_VERSION "20130117"
+#define CAN_GW_VERSION "20170425"
 #define CAN_GW_NAME "can-gw"
 
 MODULE_DESCRIPTION("PF_CAN netlink gateway");
@@ -79,9 +79,7 @@ MODULE_PARM_DESC(max_hops,
 		 __stringify(CGW_MAX_HOPS) " hops, "
 		 "default: " __stringify(CGW_DEFAULT_HOPS) ")");
 
-static HLIST_HEAD(cgw_list);
 static struct notifier_block notifier;
-
 static struct kmem_cache *cgw_cache __read_mostly;
 
 /* structure that contains the (on-the-fly) CAN frame modifications */
@@ -438,16 +436,16 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 		gwj->handled_frames++;
 }
 
-static inline int cgw_register_filter(struct cgw_job *gwj)
+static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj)
 {
-	return can_rx_register(&init_net, gwj->src.dev, gwj->ccgw.filter.can_id,
+	return can_rx_register(net, gwj->src.dev, gwj->ccgw.filter.can_id,
 			       gwj->ccgw.filter.can_mask, can_can_gw_rcv,
 			       gwj, "gw", NULL);
 }
 
-static inline void cgw_unregister_filter(struct cgw_job *gwj)
+static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj)
 {
-	can_rx_unregister(&init_net, gwj->src.dev, gwj->ccgw.filter.can_id,
+	can_rx_unregister(net, gwj->src.dev, gwj->ccgw.filter.can_id,
 			  gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
 }
 
@@ -455,9 +453,8 @@ static int cgw_notifier(struct notifier_block *nb,
 			unsigned long msg, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
 	if (dev->type != ARPHRD_CAN)
 		return NOTIFY_DONE;
 
@@ -468,11 +465,11 @@ static int cgw_notifier(struct notifier_block *nb,
 
 		ASSERT_RTNL();
 
-		hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+		hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
 
 			if (gwj->src.dev == dev || gwj->dst.dev == dev) {
 				hlist_del(&gwj->list);
-				cgw_unregister_filter(gwj);
+				cgw_unregister_filter(net, gwj);
 				kmem_cache_free(cgw_cache, gwj);
 			}
 		}
@@ -592,12 +589,13 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 /* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */
 static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = sock_net(skb->sk);
 	struct cgw_job *gwj = NULL;
 	int idx = 0;
 	int s_idx = cb->args[0];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gwj, &cgw_list, list) {
+	hlist_for_each_entry_rcu(gwj, &net->can.cgw_list, list) {
 		if (idx < s_idx)
 			goto cont;
 
@@ -812,6 +810,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 			  struct netlink_ext_ack *extack)
 {
+	struct net *net = sock_net(skb->sk);
 	struct rtcanmsg *r;
 	struct cgw_job *gwj;
 	struct cf_mod mod;
@@ -842,7 +841,7 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 		ASSERT_RTNL();
 
 		/* check for updating an existing job with identical uid */
-		hlist_for_each_entry(gwj, &cgw_list, list) {
+		hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
 
 			if (gwj->mod.uid != mod.uid)
 				continue;
@@ -880,7 +879,7 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 
 	err = -ENODEV;
 
-	gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx);
+	gwj->src.dev = __dev_get_by_index(net, gwj->ccgw.src_idx);
 
 	if (!gwj->src.dev)
 		goto out;
@@ -888,7 +887,7 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 	if (gwj->src.dev->type != ARPHRD_CAN)
 		goto out;
 
-	gwj->dst.dev = __dev_get_by_index(&init_net, gwj->ccgw.dst_idx);
+	gwj->dst.dev = __dev_get_by_index(net, gwj->ccgw.dst_idx);
 
 	if (!gwj->dst.dev)
 		goto out;
@@ -898,9 +897,9 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 
 	ASSERT_RTNL();
 
-	err = cgw_register_filter(gwj);
+	err = cgw_register_filter(net, gwj);
 	if (!err)
-		hlist_add_head_rcu(&gwj->list, &cgw_list);
+		hlist_add_head_rcu(&gwj->list, &net->can.cgw_list);
 out:
 	if (err)
 		kmem_cache_free(cgw_cache, gwj);
@@ -908,16 +907,16 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 	return err;
 }
 
-static void cgw_remove_all_jobs(void)
+static void cgw_remove_all_jobs(struct net *net)
 {
 	struct cgw_job *gwj = NULL;
 	struct hlist_node *nx;
 
 	ASSERT_RTNL();
 
-	hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+	hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
 		hlist_del(&gwj->list);
-		cgw_unregister_filter(gwj);
+		cgw_unregister_filter(net, gwj);
 		kmem_cache_free(cgw_cache, gwj);
 	}
 }
@@ -925,6 +924,7 @@ static void cgw_remove_all_jobs(void)
 static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
 			  struct netlink_ext_ack *extack)
 {
+	struct net *net = sock_net(skb->sk);
 	struct cgw_job *gwj = NULL;
 	struct hlist_node *nx;
 	struct rtcanmsg *r;
@@ -953,7 +953,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	/* two interface indices both set to 0 => remove all entries */
 	if (!ccgw.src_idx && !ccgw.dst_idx) {
-		cgw_remove_all_jobs();
+		cgw_remove_all_jobs(net);
 		return 0;
 	}
 
@@ -962,7 +962,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
 	ASSERT_RTNL();
 
 	/* remove only the first matching entry */
-	hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
+	hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
 
 		if (gwj->flags != r->flags)
 			continue;
@@ -985,7 +985,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
 			continue;
 
 		hlist_del(&gwj->list);
-		cgw_unregister_filter(gwj);
+		cgw_unregister_filter(net, gwj);
 		kmem_cache_free(cgw_cache, gwj);
 		err = 0;
 		break;
@@ -994,6 +994,24 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return err;
 }
 
+static int __net_init cangw_pernet_init(struct net *net)
+{
+	INIT_HLIST_HEAD(&net->can.cgw_list);
+	return 0;
+}
+
+static void __net_exit cangw_pernet_exit(struct net *net)
+{
+	rtnl_lock();
+	cgw_remove_all_jobs(net);
+	rtnl_unlock();
+}
+
+static struct pernet_operations cangw_pernet_ops = {
+	.init = cangw_pernet_init,
+	.exit = cangw_pernet_exit,
+};
+
 static __init int cgw_module_init(void)
 {
 	/* sanitize given module parameter */
@@ -1002,6 +1020,7 @@ static __init int cgw_module_init(void)
 	pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
 		max_hops);
 
+	register_pernet_subsys(&cangw_pernet_ops);
 	cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
 				      0, 0, NULL);
 
@@ -1031,10 +1050,7 @@ static __exit void cgw_module_exit(void)
 
 	unregister_netdevice_notifier(&notifier);
 
-	rtnl_lock();
-	cgw_remove_all_jobs();
-	rtnl_unlock();
-
+	unregister_pernet_subsys(&cangw_pernet_ops);
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
 	kmem_cache_destroy(cgw_cache);
-- 
2.11.0


^ permalink raw reply related

* [PATCH 7/8] can: add Virtual CAN Tunnel driver (vxcan)
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>

Similar to the virtual ethernet driver veth, vxcan implements a
local CAN traffic tunnel between two virtual CAN network devices.
See Kconfig entry for details.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 drivers/net/can/Kconfig        |  18 +++
 drivers/net/can/Makefile       |   1 +
 drivers/net/can/vxcan.c        | 316 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/can/vxcan.h |  12 ++
 4 files changed, 347 insertions(+)
 create mode 100644 drivers/net/can/vxcan.c
 create mode 100644 include/uapi/linux/can/vxcan.h

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 22570ea3a8d2..61c2fcf7f880 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -9,6 +9,24 @@ config CAN_VCAN
 	  This driver can also be built as a module.  If so, the module
 	  will be called vcan.
 
+config CAN_VXCAN
+	tristate "Virtual CAN Tunnel (vxcan)"
+	---help---
+	  Similar to the virtual ethernet driver veth, vxcan implements a
+	  local CAN traffic tunnel between two virtual CAN network devices.
+	  When creating a vxcan, two vxcan devices are created as pair.
+	  When one end receives the packet it appears on its pair and vice
+	  versa. The vxcan can be used for cross namespace communication.
+
+	  In opposite to vcan loopback devices the vxcan only forwards CAN
+	  frames to its pair and does *not* provide a local echo of sent
+	  CAN frames. To disable a potential echo in af_can.c the vxcan driver
+	  announces IFF_ECHO in the interface flags. To have a clean start
+	  in each namespace the CAN GW hop counter is set to zero.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called vxcan.
+
 config CAN_SLCAN
 	tristate "Serial / USB serial CAN Adaptors (slcan)"
 	depends on TTY
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 0da4f2f5c7e3..18cc6543b711 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_CAN_VCAN)		+= vcan.o
+obj-$(CONFIG_CAN_VXCAN)		+= vxcan.o
 obj-$(CONFIG_CAN_SLCAN)		+= slcan.o
 
 obj-$(CONFIG_CAN_DEV)		+= can-dev.o
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
new file mode 100644
index 000000000000..7fbb24795681
--- /dev/null
+++ b/drivers/net/can/vxcan.c
@@ -0,0 +1,316 @@
+/*
+ * vxcan.c - Virtual CAN Tunnel for cross namespace communication
+ *
+ * This code is derived from drivers/net/can/vcan.c for the virtual CAN
+ * specific parts and from drivers/net/veth.c to implement the netlink API
+ * for network interface pairs in a common and established way.
+ *
+ * Copyright (c) 2017 Oliver Hartkopp <socketcan@hartkopp.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/skb.h>
+#include <linux/can/vxcan.h>
+#include <linux/slab.h>
+#include <net/rtnetlink.h>
+
+#define DRV_NAME "vxcan"
+
+MODULE_DESCRIPTION("Virtual CAN Tunnel");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
+
+struct vxcan_priv {
+	struct net_device __rcu	*peer;
+};
+
+static netdev_tx_t vxcan_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vxcan_priv *priv = netdev_priv(dev);
+	struct net_device *peer;
+	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+	struct net_device_stats *peerstats, *srcstats = &dev->stats;
+
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
+	rcu_read_lock();
+	peer = rcu_dereference(priv->peer);
+	if (unlikely(!peer)) {
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		goto out_unlock;
+	}
+
+	skb = can_create_echo_skb(skb);
+	if (!skb)
+		goto out_unlock;
+
+	/* reset CAN GW hop counter */
+	skb->csum_start = 0;
+	skb->pkt_type   = PACKET_BROADCAST;
+	skb->dev        = peer;
+	skb->ip_summed  = CHECKSUM_UNNECESSARY;
+
+	if (netif_rx_ni(skb) == NET_RX_SUCCESS) {
+		srcstats->tx_packets++;
+		srcstats->tx_bytes += cfd->len;
+		peerstats = &peer->stats;
+		peerstats->rx_packets++;
+		peerstats->rx_bytes += cfd->len;
+	}
+
+out_unlock:
+	rcu_read_unlock();
+	return NETDEV_TX_OK;
+}
+
+
+static int vxcan_open(struct net_device *dev)
+{
+	struct vxcan_priv *priv = netdev_priv(dev);
+	struct net_device *peer = rtnl_dereference(priv->peer);
+
+	if (!peer)
+		return -ENOTCONN;
+
+	if (peer->flags & IFF_UP) {
+		netif_carrier_on(dev);
+		netif_carrier_on(peer);
+	}
+	return 0;
+}
+
+static int vxcan_close(struct net_device *dev)
+{
+	struct vxcan_priv *priv = netdev_priv(dev);
+	struct net_device *peer = rtnl_dereference(priv->peer);
+
+	netif_carrier_off(dev);
+	if (peer)
+		netif_carrier_off(peer);
+
+	return 0;
+}
+
+static int vxcan_get_iflink(const struct net_device *dev)
+{
+	struct vxcan_priv *priv = netdev_priv(dev);
+	struct net_device *peer;
+	int iflink;
+
+	rcu_read_lock();
+	peer = rcu_dereference(priv->peer);
+	iflink = peer ? peer->ifindex : 0;
+	rcu_read_unlock();
+
+	return iflink;
+}
+
+static int vxcan_change_mtu(struct net_device *dev, int new_mtu)
+{
+	/* Do not allow changing the MTU while running */
+	if (dev->flags & IFF_UP)
+		return -EBUSY;
+
+	if (new_mtu != CAN_MTU && new_mtu != CANFD_MTU)
+		return -EINVAL;
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static const struct net_device_ops vxcan_netdev_ops = {
+	.ndo_open	= vxcan_open,
+	.ndo_stop	= vxcan_close,
+	.ndo_start_xmit	= vxcan_xmit,
+	.ndo_get_iflink	= vxcan_get_iflink,
+	.ndo_change_mtu = vxcan_change_mtu,
+};
+
+static void vxcan_setup(struct net_device *dev)
+{
+	dev->type		= ARPHRD_CAN;
+	dev->mtu		= CAN_MTU;
+	dev->hard_header_len	= 0;
+	dev->addr_len		= 0;
+	dev->tx_queue_len	= 0;
+	dev->flags		= (IFF_NOARP|IFF_ECHO);
+	dev->netdev_ops		= &vxcan_netdev_ops;
+	dev->destructor		= free_netdev;
+}
+
+/* forward declaration for rtnl_create_link() */
+static struct rtnl_link_ops vxcan_link_ops;
+
+static int vxcan_newlink(struct net *net, struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[])
+{
+	struct vxcan_priv *priv;
+	struct net_device *peer;
+	struct net *peer_net;
+
+	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb;
+	char ifname[IFNAMSIZ];
+	unsigned char name_assign_type;
+	struct ifinfomsg *ifmp = NULL;
+	int err;
+
+	/* register peer device */
+	if (data && data[VXCAN_INFO_PEER]) {
+		struct nlattr *nla_peer;
+
+		nla_peer = data[VXCAN_INFO_PEER];
+		ifmp = nla_data(nla_peer);
+		err = rtnl_nla_parse_ifla(peer_tb,
+					  nla_data(nla_peer) +
+					  sizeof(struct ifinfomsg),
+					  nla_len(nla_peer) -
+					  sizeof(struct ifinfomsg),
+					  NULL);
+		if (err < 0)
+			return err;
+
+		tbp = peer_tb;
+	}
+
+	if (tbp[IFLA_IFNAME]) {
+		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
+		name_assign_type = NET_NAME_USER;
+	} else {
+		snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
+		name_assign_type = NET_NAME_ENUM;
+	}
+
+	peer_net = rtnl_link_get_net(net, tbp);
+	if (IS_ERR(peer_net))
+		return PTR_ERR(peer_net);
+
+	peer = rtnl_create_link(peer_net, ifname, name_assign_type,
+				&vxcan_link_ops, tbp);
+	if (IS_ERR(peer)) {
+		put_net(peer_net);
+		return PTR_ERR(peer);
+	}
+
+	if (ifmp && dev->ifindex)
+		peer->ifindex = ifmp->ifi_index;
+
+	err = register_netdevice(peer);
+	put_net(peer_net);
+	peer_net = NULL;
+	if (err < 0) {
+		free_netdev(peer);
+		return err;
+	}
+
+	netif_carrier_off(peer);
+
+	err = rtnl_configure_link(peer, ifmp);
+	if (err < 0) {
+		unregister_netdevice(peer);
+		return err;
+	}
+
+	/* register first device */
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
+
+	err = register_netdevice(dev);
+	if (err < 0) {
+		unregister_netdevice(peer);
+		return err;
+	}
+
+	netif_carrier_off(dev);
+
+	/* cross link the device pair */
+	priv = netdev_priv(dev);
+	rcu_assign_pointer(priv->peer, peer);
+
+	priv = netdev_priv(peer);
+	rcu_assign_pointer(priv->peer, dev);
+
+	return 0;
+}
+
+static void vxcan_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct vxcan_priv *priv;
+	struct net_device *peer;
+
+	priv = netdev_priv(dev);
+	peer = rtnl_dereference(priv->peer);
+
+	/* Note : dellink() is called from default_device_exit_batch(),
+	 * before a rcu_synchronize() point. The devices are guaranteed
+	 * not being freed before one RCU grace period.
+	 */
+	RCU_INIT_POINTER(priv->peer, NULL);
+	unregister_netdevice_queue(dev, head);
+
+	if (peer) {
+		priv = netdev_priv(peer);
+		RCU_INIT_POINTER(priv->peer, NULL);
+		unregister_netdevice_queue(peer, head);
+	}
+}
+
+static const struct nla_policy vxcan_policy[VXCAN_INFO_MAX + 1] = {
+	[VXCAN_INFO_PEER] = { .len = sizeof(struct ifinfomsg) },
+};
+
+static struct net *vxcan_get_link_net(const struct net_device *dev)
+{
+	struct vxcan_priv *priv = netdev_priv(dev);
+	struct net_device *peer = rtnl_dereference(priv->peer);
+
+	return peer ? dev_net(peer) : dev_net(dev);
+}
+
+static struct rtnl_link_ops vxcan_link_ops = {
+	.kind		= DRV_NAME,
+	.priv_size	= sizeof(struct vxcan_priv),
+	.setup		= vxcan_setup,
+	.newlink	= vxcan_newlink,
+	.dellink	= vxcan_dellink,
+	.policy		= vxcan_policy,
+	.maxtype	= VXCAN_INFO_MAX,
+	.get_link_net	= vxcan_get_link_net,
+};
+
+static __init int vxcan_init(void)
+{
+	pr_info("vxcan: Virtual CAN Tunnel driver\n");
+
+	return rtnl_link_register(&vxcan_link_ops);
+}
+
+static __exit void vxcan_exit(void)
+{
+	rtnl_link_unregister(&vxcan_link_ops);
+}
+
+module_init(vxcan_init);
+module_exit(vxcan_exit);
diff --git a/include/uapi/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h
new file mode 100644
index 000000000000..ffb0b7156f7e
--- /dev/null
+++ b/include/uapi/linux/can/vxcan.h
@@ -0,0 +1,12 @@
+#ifndef _UAPI_CAN_VXCAN_H
+#define _UAPI_CAN_VXCAN_H
+
+enum {
+	VXCAN_INFO_UNSPEC,
+	VXCAN_INFO_PEER,
+
+	__VXCAN_INFO_MAX
+#define VXCAN_INFO_MAX	(__VXCAN_INFO_MAX - 1)
+};
+
+#endif
-- 
2.11.0


^ permalink raw reply related

* [PATCH 8/8] can: enable module auto loading for virtual CAN interfaces
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>

Autoload the vcan module when a vcan instance is to be created by
'ip link add type vcan'

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 drivers/net/can/vcan.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index 674f367087c5..facca33d53e9 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -1,7 +1,7 @@
 /*
  * vcan.c - Virtual CAN interface
  *
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -50,9 +50,12 @@
 #include <linux/slab.h>
 #include <net/rtnetlink.h>
 
+#define DRV_NAME "vcan"
+
 MODULE_DESCRIPTION("virtual CAN interface");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
 
 
 /*
@@ -164,7 +167,7 @@ static void vcan_setup(struct net_device *dev)
 }
 
 static struct rtnl_link_ops vcan_link_ops __read_mostly = {
-	.kind	= "vcan",
+	.kind	= DRV_NAME,
 	.setup	= vcan_setup,
 };
 
-- 
2.11.0


^ permalink raw reply related

* [PATCH 4/8] can: complete initial namespace support
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>

The statistics and its proc output was not implemented as per-net in the
initial network namespace support by Mario Kicherer (8e8cda6d737d).
This patch adds the missing per-net statistics for the CAN subsystem.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 include/linux/can/core.h |   4 +-
 include/net/netns/can.h  |   5 ++
 net/can/af_can.c         |  71 +++++++++++++-----------
 net/can/af_can.h         |   5 --
 net/can/proc.c           | 141 +++++++++++++++++++++++++----------------------
 5 files changed, 121 insertions(+), 105 deletions(-)

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 319a0da827b8..c9a17bb1221c 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -5,7 +5,7 @@
  *
  * Authors: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
  *          Urs Thuermann   <urs.thuermann@volkswagen.de>
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  */
@@ -17,7 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
-#define CAN_VERSION "20120528"
+#define CAN_VERSION "20170425"
 
 /* increment this number each time you change some user-space interface */
 #define CAN_ABI_VERSION "9"
diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index e8beba772f1a..574157dbc43a 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -8,6 +8,8 @@
 #include <linux/spinlock.h>
 
 struct dev_rcv_lists;
+struct s_stats;
+struct s_pstats;
 
 struct netns_can {
 #if IS_ENABLED(CONFIG_PROC_FS)
@@ -26,6 +28,9 @@ struct netns_can {
 	/* receive filters subscribed for 'all' CAN devices */
 	struct dev_rcv_lists *can_rx_alldev_list;
 	spinlock_t can_rcvlists_lock;
+	struct timer_list can_stattimer;/* timer for statistics update */
+	struct s_stats *can_stats;	/* packet statistics */
+	struct s_pstats *can_pstats;	/* receive list statistics */
 };
 
 #endif /* __NETNS_CAN_H__ */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 421b60fc42c3..b6406fe33c76 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -2,7 +2,7 @@
  * af_can.c - Protocol family CAN core module
  *            (used by different CAN protocol modules)
  *
- * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -81,10 +81,6 @@ static struct kmem_cache *rcv_cache __read_mostly;
 static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
 static DEFINE_MUTEX(proto_tab_lock);
 
-struct timer_list can_stattimer;   /* timer for statistics update */
-struct s_stats    can_stats;       /* packet statistics */
-struct s_pstats   can_pstats;      /* receive list statistics */
-
 static atomic_t skbcounter = ATOMIC_INIT(0);
 
 /*
@@ -221,6 +217,7 @@ int can_send(struct sk_buff *skb, int loop)
 {
 	struct sk_buff *newskb = NULL;
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+	struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats;
 	int err = -EINVAL;
 
 	if (skb->len == CAN_MTU) {
@@ -309,8 +306,8 @@ int can_send(struct sk_buff *skb, int loop)
 		netif_rx_ni(newskb);
 
 	/* update statistics */
-	can_stats.tx_frames++;
-	can_stats.tx_frames_delta++;
+	can_stats->tx_frames++;
+	can_stats->tx_frames_delta++;
 
 	return 0;
 
@@ -468,6 +465,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
 	struct receiver *r;
 	struct hlist_head *rl;
 	struct dev_rcv_lists *d;
+	struct s_pstats *can_pstats = net->can.can_pstats;
 	int err = 0;
 
 	/* insert new receiver  (dev,canid,mask) -> (func,data) */
@@ -499,9 +497,9 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
 		hlist_add_head_rcu(&r->list, rl);
 		d->entries++;
 
-		can_pstats.rcv_entries++;
-		if (can_pstats.rcv_entries_max < can_pstats.rcv_entries)
-			can_pstats.rcv_entries_max = can_pstats.rcv_entries;
+		can_pstats->rcv_entries++;
+		if (can_pstats->rcv_entries_max < can_pstats->rcv_entries)
+			can_pstats->rcv_entries_max = can_pstats->rcv_entries;
 	} else {
 		kmem_cache_free(rcv_cache, r);
 		err = -ENODEV;
@@ -543,6 +541,7 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 {
 	struct receiver *r = NULL;
 	struct hlist_head *rl;
+	struct s_pstats *can_pstats = net->can.can_pstats;
 	struct dev_rcv_lists *d;
 
 	if (dev && dev->type != ARPHRD_CAN)
@@ -589,8 +588,8 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 	hlist_del_rcu(&r->list);
 	d->entries--;
 
-	if (can_pstats.rcv_entries > 0)
-		can_pstats.rcv_entries--;
+	if (can_pstats->rcv_entries > 0)
+		can_pstats->rcv_entries--;
 
 	/* remove device structure requested by NETDEV_UNREGISTER */
 	if (d->remove_on_zero_entries && !d->entries) {
@@ -684,11 +683,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 static void can_receive(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dev_rcv_lists *d;
+	struct net *net = dev_net(dev);
+	struct s_stats *can_stats = net->can.can_stats;
 	int matches;
 
 	/* update statistics */
-	can_stats.rx_frames++;
-	can_stats.rx_frames_delta++;
+	can_stats->rx_frames++;
+	can_stats->rx_frames_delta++;
 
 	/* create non-zero unique skb identifier together with *skb */
 	while (!(can_skb_prv(skb)->skbcnt))
@@ -697,10 +698,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_lock();
 
 	/* deliver the packet to sockets listening on all devices */
-	matches = can_rcv_filter(dev_net(dev)->can.can_rx_alldev_list, skb);
+	matches = can_rcv_filter(net->can.can_rx_alldev_list, skb);
 
 	/* find receive list for this device */
-	d = find_dev_rcv_lists(dev_net(dev), dev);
+	d = find_dev_rcv_lists(net, dev);
 	if (d)
 		matches += can_rcv_filter(d, skb);
 
@@ -710,8 +711,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
 	consume_skb(skb);
 
 	if (matches > 0) {
-		can_stats.matches++;
-		can_stats.matches_delta++;
+		can_stats->matches++;
+		can_stats->matches_delta++;
 	}
 }
 
@@ -876,8 +877,20 @@ static int can_pernet_init(struct net *net)
 	net->can.can_rx_alldev_list =
 		kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
 
-	if (IS_ENABLED(CONFIG_PROC_FS))
+	net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
+	net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
+
+	if (IS_ENABLED(CONFIG_PROC_FS)) {
+		/* the statistics are updated every second (timer triggered) */
+		if (stats_timer) {
+			setup_timer(&net->can.can_stattimer, can_stat_update,
+				    (unsigned long)net);
+			mod_timer(&net->can.can_stattimer,
+				  round_jiffies(jiffies + HZ));
+		}
+		net->can.can_stats->jiffies_init = jiffies;
 		can_init_proc(net);
+	}
 
 	return 0;
 }
@@ -886,8 +899,11 @@ static void can_pernet_exit(struct net *net)
 {
 	struct net_device *dev;
 
-	if (IS_ENABLED(CONFIG_PROC_FS))
+	if (IS_ENABLED(CONFIG_PROC_FS)) {
 		can_remove_proc(net);
+		if (stats_timer)
+			del_timer_sync(&net->can.can_stattimer);
+	}
 
 	/* remove created dev_rcv_lists from still registered CAN devices */
 	rcu_read_lock();
@@ -903,6 +919,8 @@ static void can_pernet_exit(struct net *net)
 	rcu_read_unlock();
 
 	kfree(net->can.can_rx_alldev_list);
+	kfree(net->can.can_stats);
+	kfree(net->can.can_pstats);
 }
 
 /*
@@ -950,14 +968,6 @@ static __init int can_init(void)
 	if (!rcv_cache)
 		return -ENOMEM;
 
-	if (IS_ENABLED(CONFIG_PROC_FS)) {
-		if (stats_timer) {
-		/* the statistics are updated every second (timer triggered) */
-			setup_timer(&can_stattimer, can_stat_update, 0);
-			mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
-		}
-	}
-
 	register_pernet_subsys(&can_pernet_ops);
 
 	/* protocol register */
@@ -971,11 +981,6 @@ static __init int can_init(void)
 
 static __exit void can_exit(void)
 {
-	if (IS_ENABLED(CONFIG_PROC_FS)) {
-		if (stats_timer)
-			del_timer_sync(&can_stattimer);
-	}
-
 	/* protocol unregister */
 	dev_remove_pack(&canfd_packet);
 	dev_remove_pack(&can_packet);
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 84a35e97c5e0..d0ef45bb2a72 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -115,9 +115,4 @@ void can_init_proc(struct net *net);
 void can_remove_proc(struct net *net);
 void can_stat_update(unsigned long data);
 
-/* structures and variables from af_can.c needed in proc.c for reading */
-extern struct timer_list can_stattimer;    /* timer for statistics update */
-extern struct s_stats    can_stats;        /* packet statistics */
-extern struct s_pstats   can_pstats;       /* receive list statistics */
-
 #endif /* AF_CAN_H */
diff --git a/net/can/proc.c b/net/can/proc.c
index 9a8d54d57b22..83045f00c63c 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -75,21 +75,23 @@ static const char rx_list_name[][8] = {
  * af_can statistics stuff
  */
 
-static void can_init_stats(void)
+static void can_init_stats(struct net *net)
 {
+	struct s_stats *can_stats = net->can.can_stats;
+	struct s_pstats *can_pstats = net->can.can_pstats;
 	/*
 	 * This memset function is called from a timer context (when
 	 * can_stattimer is active which is the default) OR in a process
 	 * context (reading the proc_fs when can_stattimer is disabled).
 	 */
-	memset(&can_stats, 0, sizeof(can_stats));
-	can_stats.jiffies_init = jiffies;
+	memset(can_stats, 0, sizeof(struct s_stats));
+	can_stats->jiffies_init = jiffies;
 
-	can_pstats.stats_reset++;
+	can_pstats->stats_reset++;
 
 	if (user_reset) {
 		user_reset = 0;
-		can_pstats.user_reset++;
+		can_pstats->user_reset++;
 	}
 }
 
@@ -115,64 +117,66 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
 
 void can_stat_update(unsigned long data)
 {
+	struct net *net = (struct net *)data;
+	struct s_stats *can_stats = net->can.can_stats;
 	unsigned long j = jiffies; /* snapshot */
 
 	/* restart counting in timer context on user request */
 	if (user_reset)
-		can_init_stats();
+		can_init_stats(net);
 
 	/* restart counting on jiffies overflow */
-	if (j < can_stats.jiffies_init)
-		can_init_stats();
+	if (j < can_stats->jiffies_init)
+		can_init_stats(net);
 
 	/* prevent overflow in calc_rate() */
-	if (can_stats.rx_frames > (ULONG_MAX / HZ))
-		can_init_stats();
+	if (can_stats->rx_frames > (ULONG_MAX / HZ))
+		can_init_stats(net);
 
 	/* prevent overflow in calc_rate() */
-	if (can_stats.tx_frames > (ULONG_MAX / HZ))
-		can_init_stats();
+	if (can_stats->tx_frames > (ULONG_MAX / HZ))
+		can_init_stats(net);
 
 	/* matches overflow - very improbable */
-	if (can_stats.matches > (ULONG_MAX / 100))
-		can_init_stats();
+	if (can_stats->matches > (ULONG_MAX / 100))
+		can_init_stats(net);
 
 	/* calc total values */
-	if (can_stats.rx_frames)
-		can_stats.total_rx_match_ratio = (can_stats.matches * 100) /
-			can_stats.rx_frames;
+	if (can_stats->rx_frames)
+		can_stats->total_rx_match_ratio = (can_stats->matches * 100) /
+			can_stats->rx_frames;
 
-	can_stats.total_tx_rate = calc_rate(can_stats.jiffies_init, j,
-					    can_stats.tx_frames);
-	can_stats.total_rx_rate = calc_rate(can_stats.jiffies_init, j,
-					    can_stats.rx_frames);
+	can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j,
+					    can_stats->tx_frames);
+	can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j,
+					    can_stats->rx_frames);
 
 	/* calc current values */
-	if (can_stats.rx_frames_delta)
-		can_stats.current_rx_match_ratio =
-			(can_stats.matches_delta * 100) /
-			can_stats.rx_frames_delta;
+	if (can_stats->rx_frames_delta)
+		can_stats->current_rx_match_ratio =
+			(can_stats->matches_delta * 100) /
+			can_stats->rx_frames_delta;
 
-	can_stats.current_tx_rate = calc_rate(0, HZ, can_stats.tx_frames_delta);
-	can_stats.current_rx_rate = calc_rate(0, HZ, can_stats.rx_frames_delta);
+	can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta);
+	can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta);
 
 	/* check / update maximum values */
-	if (can_stats.max_tx_rate < can_stats.current_tx_rate)
-		can_stats.max_tx_rate = can_stats.current_tx_rate;
+	if (can_stats->max_tx_rate < can_stats->current_tx_rate)
+		can_stats->max_tx_rate = can_stats->current_tx_rate;
 
-	if (can_stats.max_rx_rate < can_stats.current_rx_rate)
-		can_stats.max_rx_rate = can_stats.current_rx_rate;
+	if (can_stats->max_rx_rate < can_stats->current_rx_rate)
+		can_stats->max_rx_rate = can_stats->current_rx_rate;
 
-	if (can_stats.max_rx_match_ratio < can_stats.current_rx_match_ratio)
-		can_stats.max_rx_match_ratio = can_stats.current_rx_match_ratio;
+	if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio)
+		can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio;
 
 	/* clear values for 'current rate' calculation */
-	can_stats.tx_frames_delta = 0;
-	can_stats.rx_frames_delta = 0;
-	can_stats.matches_delta   = 0;
+	can_stats->tx_frames_delta = 0;
+	can_stats->rx_frames_delta = 0;
+	can_stats->matches_delta   = 0;
 
 	/* restart timer (one second) */
-	mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
+	mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ));
 }
 
 /*
@@ -206,57 +210,61 @@ static void can_print_recv_banner(struct seq_file *m)
 
 static int can_stats_proc_show(struct seq_file *m, void *v)
 {
+	struct net *net = m->private;
+	struct s_stats *can_stats = net->can.can_stats;
+	struct s_pstats *can_pstats = net->can.can_pstats;
+
 	seq_putc(m, '\n');
-	seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats.tx_frames);
-	seq_printf(m, " %8ld received frames (RXF)\n", can_stats.rx_frames);
-	seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats.matches);
+	seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames);
+	seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames);
+	seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches);
 
 	seq_putc(m, '\n');
 
-	if (can_stattimer.function == can_stat_update) {
+	if (net->can.can_stattimer.function == can_stat_update) {
 		seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
-				can_stats.total_rx_match_ratio);
+				can_stats->total_rx_match_ratio);
 
 		seq_printf(m, " %8ld frames/s total tx rate (TXR)\n",
-				can_stats.total_tx_rate);
+				can_stats->total_tx_rate);
 		seq_printf(m, " %8ld frames/s total rx rate (RXR)\n",
-				can_stats.total_rx_rate);
+				can_stats->total_rx_rate);
 
 		seq_putc(m, '\n');
 
 		seq_printf(m, " %8ld %% current match ratio (CRXMR)\n",
-				can_stats.current_rx_match_ratio);
+				can_stats->current_rx_match_ratio);
 
 		seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n",
-				can_stats.current_tx_rate);
+				can_stats->current_tx_rate);
 		seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n",
-				can_stats.current_rx_rate);
+				can_stats->current_rx_rate);
 
 		seq_putc(m, '\n');
 
 		seq_printf(m, " %8ld %% max match ratio (MRXMR)\n",
-				can_stats.max_rx_match_ratio);
+				can_stats->max_rx_match_ratio);
 
 		seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n",
-				can_stats.max_tx_rate);
+				can_stats->max_tx_rate);
 		seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n",
-				can_stats.max_rx_rate);
+				can_stats->max_rx_rate);
 
 		seq_putc(m, '\n');
 	}
 
 	seq_printf(m, " %8ld current receive list entries (CRCV)\n",
-			can_pstats.rcv_entries);
+			can_pstats->rcv_entries);
 	seq_printf(m, " %8ld maximum receive list entries (MRCV)\n",
-			can_pstats.rcv_entries_max);
+			can_pstats->rcv_entries_max);
 
-	if (can_pstats.stats_reset)
+	if (can_pstats->stats_reset)
 		seq_printf(m, "\n %8ld statistic resets (STR)\n",
-				can_pstats.stats_reset);
+				can_pstats->stats_reset);
 
-	if (can_pstats.user_reset)
+	if (can_pstats->user_reset)
 		seq_printf(m, " %8ld user statistic resets (USTR)\n",
-				can_pstats.user_reset);
+				can_pstats->user_reset);
 
 	seq_putc(m, '\n');
 	return 0;
@@ -264,7 +272,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
 
 static int can_stats_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, can_stats_proc_show, NULL);
+	return single_open_net(inode, file, can_stats_proc_show);
 }
 
 static const struct file_operations can_stats_proc_fops = {
@@ -277,25 +285,28 @@ static const struct file_operations can_stats_proc_fops = {
 
 static int can_reset_stats_proc_show(struct seq_file *m, void *v)
 {
+	struct net *net = m->private;
+	struct s_pstats *can_pstats = net->can.can_pstats;
+	struct s_stats *can_stats = net->can.can_stats;
+
 	user_reset = 1;
 
-	if (can_stattimer.function == can_stat_update) {
+	if (net->can.can_stattimer.function == can_stat_update) {
 		seq_printf(m, "Scheduled statistic reset #%ld.\n",
-				can_pstats.stats_reset + 1);
-
+				can_pstats->stats_reset + 1);
 	} else {
-		if (can_stats.jiffies_init != jiffies)
-			can_init_stats();
+		if (can_stats->jiffies_init != jiffies)
+			can_init_stats(net);
 
 		seq_printf(m, "Performed statistic reset #%ld.\n",
-				can_pstats.stats_reset);
+				can_pstats->stats_reset);
 	}
 	return 0;
 }
 
 static int can_reset_stats_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, can_reset_stats_proc_show, NULL);
+	return single_open_net(inode, file, can_reset_stats_proc_show);
 }
 
 static const struct file_operations can_reset_stats_proc_fops = {
@@ -314,7 +325,7 @@ static int can_version_proc_show(struct seq_file *m, void *v)
 
 static int can_version_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, can_version_proc_show, NULL);
+	return single_open_net(inode, file, can_version_proc_show);
 }
 
 static const struct file_operations can_version_proc_fops = {
-- 
2.11.0

^ permalink raw reply related

* [PATCH 5/8] can: network namespace support for CAN_BCM protocol
From: Oliver Hartkopp @ 2017-04-25  6:19 UTC (permalink / raw)
  To: linux-can, davem; +Cc: Oliver Hartkopp, mkl, dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>

The CAN_BCM protocol and its procfs entries were not implemented as per-net
in the initial network namespace support by Mario Kicherer (8e8cda6d737d).
This patch adds the missing per-net functionality for the CAN BCM.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 include/net/netns/can.h |  1 +
 net/can/bcm.c           | 90 +++++++++++++++++++++++++++++++------------------
 2 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index 574157dbc43a..0f3c31aab8a8 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -23,6 +23,7 @@ struct netns_can {
 	struct proc_dir_entry *pde_rcvlist_sff;
 	struct proc_dir_entry *pde_rcvlist_eff;
 	struct proc_dir_entry *pde_rcvlist_err;
+	struct proc_dir_entry *bcmproc_dir;
 #endif
 
 	/* receive filters subscribed for 'all' CAN devices */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 1976629a8463..0e855917b7e1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1,7 +1,7 @@
 /*
  * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
  *
- * Copyright (c) 2002-2016 Volkswagen Group Electronic Research
+ * Copyright (c) 2002-2017 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -77,7 +77,7 @@
 		     (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
 		     (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
 
-#define CAN_BCM_VERSION "20161123"
+#define CAN_BCM_VERSION "20170425"
 
 MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -118,8 +118,6 @@ struct bcm_op {
 	struct net_device *rx_reg_dev;
 };
 
-static struct proc_dir_entry *proc_dir;
-
 struct bcm_sock {
 	struct sock sk;
 	int bound;
@@ -149,7 +147,7 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
 /*
  * procfs functions
  */
-static char *bcm_proc_getifname(char *result, int ifindex)
+static char *bcm_proc_getifname(struct net *net, char *result, int ifindex)
 {
 	struct net_device *dev;
 
@@ -157,7 +155,7 @@ static char *bcm_proc_getifname(char *result, int ifindex)
 		return "any";
 
 	rcu_read_lock();
-	dev = dev_get_by_index_rcu(&init_net, ifindex);
+	dev = dev_get_by_index_rcu(net, ifindex);
 	if (dev)
 		strcpy(result, dev->name);
 	else
@@ -170,7 +168,8 @@ static char *bcm_proc_getifname(char *result, int ifindex)
 static int bcm_proc_show(struct seq_file *m, void *v)
 {
 	char ifname[IFNAMSIZ];
-	struct sock *sk = (struct sock *)m->private;
+	struct net *net = m->private;
+	struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode);
 	struct bcm_sock *bo = bcm_sk(sk);
 	struct bcm_op *op;
 
@@ -178,7 +177,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 	seq_printf(m, " / sk %pK", sk);
 	seq_printf(m, " / bo %pK", bo);
 	seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs);
-	seq_printf(m, " / bound %s", bcm_proc_getifname(ifname, bo->ifindex));
+	seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex));
 	seq_printf(m, " <<<\n");
 
 	list_for_each_entry(op, &bo->rx_ops, list) {
@@ -190,7 +189,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 			continue;
 
 		seq_printf(m, "rx_op: %03X %-5s ", op->can_id,
-			   bcm_proc_getifname(ifname, op->ifindex));
+			   bcm_proc_getifname(net, ifname, op->ifindex));
 
 		if (op->flags & CAN_FD_FRAME)
 			seq_printf(m, "(%u)", op->nframes);
@@ -219,7 +218,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 	list_for_each_entry(op, &bo->tx_ops, list) {
 
 		seq_printf(m, "tx_op: %03X %s ", op->can_id,
-			   bcm_proc_getifname(ifname, op->ifindex));
+			   bcm_proc_getifname(net, ifname, op->ifindex));
 
 		if (op->flags & CAN_FD_FRAME)
 			seq_printf(m, "(%u) ", op->nframes);
@@ -242,7 +241,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
 
 static int bcm_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, bcm_proc_show, PDE_DATA(inode));
+	return single_open_net(inode, file, bcm_proc_show);
 }
 
 static const struct file_operations bcm_proc_fops = {
@@ -267,7 +266,7 @@ static void bcm_can_tx(struct bcm_op *op)
 	if (!op->ifindex)
 		return;
 
-	dev = dev_get_by_index(&init_net, op->ifindex);
+	dev = dev_get_by_index(sock_net(op->sk), op->ifindex);
 	if (!dev) {
 		/* RFC: should this bcm_op remove itself here? */
 		return;
@@ -764,7 +763,7 @@ static void bcm_remove_op(struct bcm_op *op)
 static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
 {
 	if (op->rx_reg_dev == dev) {
-		can_rx_unregister(&init_net, dev, op->can_id,
+		can_rx_unregister(dev_net(dev), dev, op->can_id,
 				  REGMASK(op->can_id), bcm_rx_handler, op);
 
 		/* mark as removed subscription */
@@ -800,7 +799,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
 				if (op->rx_reg_dev) {
 					struct net_device *dev;
 
-					dev = dev_get_by_index(&init_net,
+					dev = dev_get_by_index(sock_net(op->sk),
 							       op->ifindex);
 					if (dev) {
 						bcm_rx_unreg(dev, op);
@@ -808,7 +807,8 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
 					}
 				}
 			} else
-				can_rx_unregister(&init_net, NULL, op->can_id,
+				can_rx_unregister(sock_net(op->sk), NULL,
+						  op->can_id,
 						  REGMASK(op->can_id),
 						  bcm_rx_handler, op);
 
@@ -1220,9 +1220,9 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		if (ifindex) {
 			struct net_device *dev;
 
-			dev = dev_get_by_index(&init_net, ifindex);
+			dev = dev_get_by_index(sock_net(sk), ifindex);
 			if (dev) {
-				err = can_rx_register(&init_net, dev,
+				err = can_rx_register(sock_net(sk), dev,
 						      op->can_id,
 						      REGMASK(op->can_id),
 						      bcm_rx_handler, op,
@@ -1233,7 +1233,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			}
 
 		} else
-			err = can_rx_register(&init_net, NULL, op->can_id,
+			err = can_rx_register(sock_net(sk), NULL, op->can_id,
 					      REGMASK(op->can_id),
 					      bcm_rx_handler, op, "bcm", sk);
 		if (err) {
@@ -1273,7 +1273,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk,
 		return err;
 	}
 
-	dev = dev_get_by_index(&init_net, ifindex);
+	dev = dev_get_by_index(sock_net(sk), ifindex);
 	if (!dev) {
 		kfree_skb(skb);
 		return -ENODEV;
@@ -1338,7 +1338,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		if (ifindex) {
 			struct net_device *dev;
 
-			dev = dev_get_by_index(&init_net, ifindex);
+			dev = dev_get_by_index(sock_net(sk), ifindex);
 			if (!dev)
 				return -ENODEV;
 
@@ -1419,7 +1419,7 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
 	struct bcm_op *op;
 	int notify_enodev = 0;
 
-	if (!net_eq(dev_net(dev), &init_net))
+	if (!net_eq(dev_net(dev), sock_net(sk)))
 		return NOTIFY_DONE;
 
 	if (dev->type != ARPHRD_CAN)
@@ -1491,6 +1491,7 @@ static int bcm_init(struct sock *sk)
 static int bcm_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
 	struct bcm_sock *bo;
 	struct bcm_op *op, *next;
 
@@ -1522,14 +1523,14 @@ static int bcm_release(struct socket *sock)
 			if (op->rx_reg_dev) {
 				struct net_device *dev;
 
-				dev = dev_get_by_index(&init_net, op->ifindex);
+				dev = dev_get_by_index(net, op->ifindex);
 				if (dev) {
 					bcm_rx_unreg(dev, op);
 					dev_put(dev);
 				}
 			}
 		} else
-			can_rx_unregister(&init_net, NULL, op->can_id,
+			can_rx_unregister(net, NULL, op->can_id,
 					  REGMASK(op->can_id),
 					  bcm_rx_handler, op);
 
@@ -1537,8 +1538,8 @@ static int bcm_release(struct socket *sock)
 	}
 
 	/* remove procfs entry */
-	if (proc_dir && bo->bcm_proc_read)
-		remove_proc_entry(bo->procname, proc_dir);
+	if (net->can.bcmproc_dir && bo->bcm_proc_read)
+		remove_proc_entry(bo->procname, net->can.bcmproc_dir);
 
 	/* remove device reference */
 	if (bo->bound) {
@@ -1561,6 +1562,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
 	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
 	struct sock *sk = sock->sk;
 	struct bcm_sock *bo = bcm_sk(sk);
+	struct net *net = sock_net(sk);
 	int ret = 0;
 
 	if (len < sizeof(*addr))
@@ -1577,7 +1579,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
 	if (addr->can_ifindex) {
 		struct net_device *dev;
 
-		dev = dev_get_by_index(&init_net, addr->can_ifindex);
+		dev = dev_get_by_index(net, addr->can_ifindex);
 		if (!dev) {
 			ret = -ENODEV;
 			goto fail;
@@ -1596,11 +1598,11 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
 		bo->ifindex = 0;
 	}
 
-	if (proc_dir) {
+	if (net->can.bcmproc_dir) {
 		/* unique socket address as filename */
 		sprintf(bo->procname, "%lu", sock_i_ino(sk));
 		bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
-						     proc_dir,
+						     net->can.bcmproc_dir,
 						     &bcm_proc_fops, sk);
 		if (!bo->bcm_proc_read) {
 			ret = -ENOMEM;
@@ -1687,6 +1689,31 @@ static const struct can_proto bcm_can_proto = {
 	.prot       = &bcm_proto,
 };
 
+static int canbcm_pernet_init(struct net *net)
+{
+	/* create /proc/net/can-bcm directory */
+	if (IS_ENABLED(CONFIG_PROC_FS)) {
+		net->can.bcmproc_dir =
+			proc_net_mkdir(net, "can-bcm", net->proc_net);
+	}
+
+	return 0;
+}
+
+static void canbcm_pernet_exit(struct net *net)
+{
+	/* remove /proc/net/can-bcm directory */
+	if (IS_ENABLED(CONFIG_PROC_FS)) {
+		if (net->can.bcmproc_dir)
+			remove_proc_entry("can-bcm", net->proc_net);
+	}
+}
+
+static struct pernet_operations canbcm_pernet_ops __read_mostly = {
+	.init = canbcm_pernet_init,
+	.exit = canbcm_pernet_exit,
+};
+
 static int __init bcm_module_init(void)
 {
 	int err;
@@ -1699,17 +1726,14 @@ static int __init bcm_module_init(void)
 		return err;
 	}
 
-	/* create /proc/net/can-bcm directory */
-	proc_dir = proc_mkdir("can-bcm", init_net.proc_net);
+	register_pernet_subsys(&canbcm_pernet_ops);
 	return 0;
 }
 
 static void __exit bcm_module_exit(void)
 {
 	can_proto_unregister(&bcm_can_proto);
-
-	if (proc_dir)
-		remove_proc_entry("can-bcm", init_net.proc_net);
+	unregister_pernet_subsys(&canbcm_pernet_ops);
 }
 
 module_init(bcm_module_init);
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next] can: ti_hecc: fix return value check in ti_hecc_probe()
From: Wei Yongjun @ 2017-04-25  6:44 UTC (permalink / raw)
  To: Wolfgang Grandegger, Marc Kleine-Budde, Anton Glukhov,
	Yegor Yefremov
  Cc: Wei Yongjun, linux-can, netdev

From: Wei Yongjun <weiyongjun1@huawei.com>

In case of error, the function devm_ioremap_resource() returns ERR_PTR()
and never returns NULL. The NULL test in the return value check should
be replaced with IS_ERR().

Fixes: dabf54dd1c63 ("can: ti_hecc: Convert TI HECC driver to DT only driver")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
---
 drivers/net/can/ti_hecc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index b8aac53..4d49414 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -898,9 +898,9 @@ static int ti_hecc_probe(struct platform_device *pdev)
 	}
 
 	priv->base = devm_ioremap_resource(&pdev->dev, res);
-	if (!priv->base) {
+	if (IS_ERR(priv->base)) {
 		dev_err(&pdev->dev, "hecc ioremap failed\n");
-		return -ENOMEM;
+		return PTR_ERR(priv->base);
 	}
 
 	/* handle hecc-ram memory */
@@ -911,9 +911,9 @@ static int ti_hecc_probe(struct platform_device *pdev)
 	}
 
 	priv->hecc_ram = devm_ioremap_resource(&pdev->dev, res);
-	if (!priv->hecc_ram) {
+	if (IS_ERR(priv->hecc_ram)) {
 		dev_err(&pdev->dev, "hecc-ram ioremap failed\n");
-		return -ENOMEM;
+		return PTR_ERR(priv->hecc_ram);
 	}
 
 	/* handle mbx memory */
@@ -924,9 +924,9 @@ static int ti_hecc_probe(struct platform_device *pdev)
 	}
 
 	priv->mbx = devm_ioremap_resource(&pdev->dev, res);
-	if (!priv->mbx) {
+	if (IS_ERR(priv->mbx)) {
 		dev_err(&pdev->dev, "mbx ioremap failed\n");
-		return -ENOMEM;
+		return PTR_ERR(priv->mbx);
 	}
 
 	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);


^ permalink raw reply related

* Re: [PATCH 0/8] Fix and complete CAN namespace support
From: Marc Kleine-Budde @ 2017-04-25  6:48 UTC (permalink / raw)
  To: Oliver Hartkopp, linux-can, davem; +Cc: dev, netdev
In-Reply-To: <20170425061945.28722-1-socketcan@hartkopp.net>


[-- Attachment #1.1: Type: text/plain, Size: 870 bytes --]

On 04/25/2017 08:19 AM, Oliver Hartkopp wrote:
> Hello Dave,
> 
> unfortunately the initial network namespace support by Mario Kicherer
> (8e8cda6d737d) slipped into net-next without further review and Marc pushed
> the code without my Acked-by. Due to the fact that this code was in net-next
> now I spent some nights to fix, clean up, finalize and test the missing pieces
> for the namespace support for the CAN subsystem in net/can.
> 
> As Marc is currently *VERY* unresponsive on the mailing list due to his 'real'

... and holidays :) But I'm back in the office now.

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox