Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 14/15] s390/qeth: pass full data length to l3_fill_header()
From: Julian Wiedmann @ 2017-12-20 19:11 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20171220191109.90487-1-jwi@linux.vnet.ibm.com>

The TSO and IQD paths already need to fix-up the current values, and
OSA will require more flexibility in the future as well. So just let
the caller specify the data length.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
---
 drivers/s390/net/qeth_l3_main.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 39b41e24318b..e31cc4fa544c 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2405,11 +2405,12 @@ static u8 qeth_l3_cast_type_to_flag(int cast_type)
 }
 
 static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
-				struct sk_buff *skb, int ipv, int cast_type)
+				struct sk_buff *skb, int ipv, int cast_type,
+				unsigned int data_len)
 {
 	memset(hdr, 0, sizeof(struct qeth_hdr));
 	hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
-	hdr->hdr.l3.length = skb->len - sizeof(struct qeth_hdr);
+	hdr->hdr.l3.length = data_len;
 
 	/*
 	 * before we're going to overwrite this location with next hop ip.
@@ -2488,7 +2489,6 @@ static void qeth_tso_fill_header(struct qeth_card *card,
 
 	/*fix header to TSO values ...*/
 	hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO;
-	hdr->hdr.hdr.l3.length = skb->len - sizeof(struct qeth_hdr_tso);
 	/*set values which are fix for the first approach ...*/
 	hdr->ext.hdr_tot_len = (__u16) sizeof(struct qeth_hdr_ext_tso);
 	hdr->ext.imb_hdr_no  = 1;
@@ -2649,21 +2649,23 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 	if (use_tso) {
 		hdr = skb_push(new_skb, sizeof(struct qeth_hdr_tso));
 		memset(hdr, 0, sizeof(struct qeth_hdr_tso));
-		qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type);
+		qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type,
+				    new_skb->len - sizeof(struct qeth_hdr_tso));
 		qeth_tso_fill_header(card, hdr, new_skb);
 		hdr_elements++;
 	} else {
 		if (data_offset < 0) {
 			hdr = skb_push(new_skb, sizeof(struct qeth_hdr));
-			qeth_l3_fill_header(card, hdr, new_skb, ipv,
-						cast_type);
+			qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type,
+					    new_skb->len -
+					    sizeof(struct qeth_hdr));
 		} else {
 			if (be16_to_cpu(new_skb->protocol) == ETH_P_AF_IUCV)
 				qeth_l3_fill_af_iucv_hdr(card, hdr, new_skb);
 			else {
 				qeth_l3_fill_header(card, hdr, new_skb, ipv,
-							cast_type);
-				hdr->hdr.l3.length = new_skb->len - data_offset;
+						    cast_type,
+						    new_skb->len - data_offset);
 			}
 		}
 
-- 
2.13.5

^ permalink raw reply related

* [PATCH net-next 15/15] s390/qeth: replace open-coded in*_pton()
From: Julian Wiedmann @ 2017-12-20 19:11 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20171220191109.90487-1-jwi@linux.vnet.ibm.com>

There's a common helper for parsing an IP address string, let's use it.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
---
 drivers/s390/net/qeth_l3.h      |  1 -
 drivers/s390/net/qeth_l3_main.c | 88 -----------------------------------------
 drivers/s390/net/qeth_l3_sys.c  | 12 ++++++
 3 files changed, 12 insertions(+), 89 deletions(-)

diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 071c00841208..49f92ebbc5ad 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -69,7 +69,6 @@ struct qeth_ipato_entry {
 extern const struct attribute_group *qeth_l3_attr_groups[];
 
 void qeth_l3_ipaddr_to_string(enum qeth_prot_versions, const __u8 *, char *);
-int qeth_l3_string_to_ipaddr(const char *, enum qeth_prot_versions, __u8 *);
 int qeth_l3_create_device_attributes(struct device *);
 void qeth_l3_remove_device_attributes(struct device *);
 int qeth_l3_setrouting_v4(struct qeth_card *);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index e31cc4fa544c..92bcb02671bc 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -48,93 +48,16 @@ static int qeth_l3_register_addr_entry(struct qeth_card *,
 static int qeth_l3_deregister_addr_entry(struct qeth_card *,
 		struct qeth_ipaddr *);
 
-static int qeth_l3_isxdigit(char *buf)
-{
-	while (*buf) {
-		if (!isxdigit(*buf++))
-			return 0;
-	}
-	return 1;
-}
-
 static void qeth_l3_ipaddr4_to_string(const __u8 *addr, char *buf)
 {
 	sprintf(buf, "%pI4", addr);
 }
 
-static int qeth_l3_string_to_ipaddr4(const char *buf, __u8 *addr)
-{
-	int count = 0, rc = 0;
-	unsigned int in[4];
-	char c;
-
-	rc = sscanf(buf, "%u.%u.%u.%u%c",
-		    &in[0], &in[1], &in[2], &in[3], &c);
-	if (rc != 4 && (rc != 5 || c != '\n'))
-		return -EINVAL;
-	for (count = 0; count < 4; count++) {
-		if (in[count] > 255)
-			return -EINVAL;
-		addr[count] = in[count];
-	}
-	return 0;
-}
-
 static void qeth_l3_ipaddr6_to_string(const __u8 *addr, char *buf)
 {
 	sprintf(buf, "%pI6", addr);
 }
 
-static int qeth_l3_string_to_ipaddr6(const char *buf, __u8 *addr)
-{
-	const char *end, *end_tmp, *start;
-	__u16 *in;
-	char num[5];
-	int num2, cnt, out, found, save_cnt;
-	unsigned short in_tmp[8] = {0, };
-
-	cnt = out = found = save_cnt = num2 = 0;
-	end = start = buf;
-	in = (__u16 *) addr;
-	memset(in, 0, 16);
-	while (*end) {
-		end = strchr(start, ':');
-		if (end == NULL) {
-			end = buf + strlen(buf);
-			end_tmp = strchr(start, '\n');
-			if (end_tmp != NULL)
-				end = end_tmp;
-			out = 1;
-		}
-		if ((end - start)) {
-			memset(num, 0, 5);
-			if ((end - start) > 4)
-				return -EINVAL;
-			memcpy(num, start, end - start);
-			if (!qeth_l3_isxdigit(num))
-				return -EINVAL;
-			sscanf(start, "%x", &num2);
-			if (found)
-				in_tmp[save_cnt++] = num2;
-			else
-				in[cnt++] = num2;
-			if (out)
-				break;
-		} else {
-			if (found)
-				return -EINVAL;
-			found = 1;
-		}
-		start = ++end;
-	}
-	if (cnt + save_cnt > 8)
-		return -EINVAL;
-	cnt = 7;
-	while (save_cnt)
-		in[cnt--] = in_tmp[--save_cnt];
-	return 0;
-}
-
 void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr,
 				char *buf)
 {
@@ -144,17 +67,6 @@ void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr,
 		qeth_l3_ipaddr6_to_string(addr, buf);
 }
 
-int qeth_l3_string_to_ipaddr(const char *buf, enum qeth_prot_versions proto,
-				__u8 *addr)
-{
-	if (proto == QETH_PROT_IPV4)
-		return qeth_l3_string_to_ipaddr4(buf, addr);
-	else if (proto == QETH_PROT_IPV6)
-		return qeth_l3_string_to_ipaddr6(buf, addr);
-	else
-		return -EINVAL;
-}
-
 static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len)
 {
 	int i, j;
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index 6ea2b528a64e..00a10b66c01f 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -10,11 +10,23 @@
 #include <linux/slab.h>
 #include <asm/ebcdic.h>
 #include <linux/hashtable.h>
+#include <linux/inet.h>
 #include "qeth_l3.h"
 
 #define QETH_DEVICE_ATTR(_id, _name, _mode, _show, _store) \
 struct device_attribute dev_attr_##_id = __ATTR(_name, _mode, _show, _store)
 
+static int qeth_l3_string_to_ipaddr(const char *buf,
+				    enum qeth_prot_versions proto, u8 *addr)
+{
+	const char *end;
+
+	if ((proto == QETH_PROT_IPV4 && !in4_pton(buf, -1, addr, -1, &end)) ||
+	    (proto == QETH_PROT_IPV6 && !in6_pton(buf, -1, addr, -1, &end)))
+		return -EINVAL;
+	return 0;
+}
+
 static ssize_t qeth_l3_dev_route_show(struct qeth_card *card,
 			struct qeth_routing_info *route, char *buf)
 {
-- 
2.13.5

^ permalink raw reply related

* Re: [PATCH v3 net-next 0/6] tls: Add generic NIC offload infrastructure
From: Jiri Pirko @ 2017-12-20 19:12 UTC (permalink / raw)
  To: David Miller
  Cc: ilyal, borisp, netdev, davejwatson, tom, hannes, aviadye, liranl
In-Reply-To: <20171220.113614.1447120146427365359.davem@davemloft.net>

Wed, Dec 20, 2017 at 05:36:14PM CET, davem@davemloft.net wrote:
>From: Ilya Lesokhin <ilyal@mellanox.com>
>Date: Wed, 20 Dec 2017 16:23:03 +0000
>
>>> Whether you provide the API addition patches and the user in the same patch
>>> series, or a separate one, doesn't really matter.  What is important is that this
>>> is accessible to the reviewer at the same time.
>> 
>> Note that we did provide a user in an accessible place.
>
>That is not accessible for people reading netdev, it needs to be posted
>on the netdev list.
>
>It is never appropriate to require a reviewer to look at some external
>site to review a patch series posted here.

Yeah, that is why I think it is really best to have the infra and the
driver that uses it in a single patchset. Easier to understand what is
going on :)

^ permalink raw reply

* Re: Linux ECN Handling
From: Steve Ibanez @ 2017-12-20 19:20 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: Eric Dumazet, Yuchung Cheng, Daniel Borkmann, Netdev,
	Florian Westphal, Mohammad Alizadeh, Lawrence Brakmo
In-Reply-To: <CADVnQynQq6MWLXTaugV-rJq1MBmCrmL4bWNupWC+-uZw0Dz84w@mail.gmail.com>

Hi Neal,

I added in some more printk statements and it does indeed look like
all of these calls you listed are being invoked successfully. I guess
this isn't too surprising given what the inet_csk_schedule_ack() and
inet_csk_ack_scheduled() functions are doing:

static inline void inet_csk_schedule_ack(struct sock *sk)
{
        inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED;
}

static inline int inet_csk_ack_scheduled(const struct sock *sk)
{
        return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED;
}

So through the code path that you listed, the inet_csk_schedule_ack()
function sets the ICSK_ACK_SCHED bit and then the tcp_ack_snd_check()
function just checks that the ICSK_ACK_SCHED bit is indeed set.

Do you know how I can verify that setting the ICSK_ACK_SCHED bit
actually results in an ACK being sent?

Thanks,
-Steve

On Tue, Dec 19, 2017 at 4:08 PM, Neal Cardwell <ncardwell@google.com> wrote:
> On Tue, Dec 19, 2017 at 5:00 PM, Steve Ibanez <sibanez@stanford.edu> wrote:
>> Hi Neal,
>>
>> I managed to track down the code path that the unACKed CWR packet is
>> taking. The tcp_rcv_established() function calls tcp_ack_snd_check()
>> at the end of step5 and then the return statement indicated below is
>> invoked, which prevents the __tcp_ack_snd_check() function from
>> running.
>>
>> static inline void tcp_ack_snd_check(struct sock *sk)
>> {
>>         if (!inet_csk_ack_scheduled(sk)) {
>>                 /* We sent a data segment already. */
>>                 return;   /* <=== here */
>>         }
>>         __tcp_ack_snd_check(sk, 1);
>> }
>>
>> So somehow tcp_ack_snd_check() thinks that a data segment was already
>> sent when in fact it wasn't. Do you see a way around this issue?
>
> Thanks for tracking that down! AFAICT in this case the call chain we
> are trying to achieve is as follows:
>
> tcp_rcv_established()
>  -> tcp_data_queue()
>  -> tcp_event_data_recv()
>  -> inet_csk_schedule_ack()
>
> The only think I can think of would be to add printks that fire for
> CWR packets, to isolate why the code bails out before it reaches those
> calls...
>
> thanks,
> neal

^ permalink raw reply

* Re: [PATCH net-next v4 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events
From: David Miller @ 2017-12-20 19:20 UTC (permalink / raw)
  To: mhiramat
  Cc: mingo, ian.mcdonald, vyasevich, stephen, rostedt, peterz, tglx,
	linux-kernel, hpa, gerrit, nhorman, dccp, netdev, linux-sctp, sfr
In-Reply-To: <151374325126.2497.6934744693865165386.stgit@devbox>

From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 20 Dec 2017 13:14:11 +0900

> This series is v4 of the replacement of jprobe usage with trace
> events. This version is rebased on net-next, fixes a build warning
> and moves a temporal variable definition in a block.
> 
> Previous version is here;
> https://lkml.org/lkml/2017/12/19/153
> 
> Changes from v3:
>   All: Rebased on net-next
>   [3/6]: fixes a build warning for i386 by casting pointer unsigned
>         long instead of __u64, and moves a temporal variable
>          definition in a block.

Looks good, series applied to net-next, thanks.

^ permalink raw reply

* Re: [for-next 07/11] net/mlx5: E-Switch, Create generic header struct to be used by representors
From: Mark Bloch @ 2017-12-20 19:22 UTC (permalink / raw)
  To: David Miller, saeedm-VPRAkNaXOzVWk0Htik3J/w
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, leonro-VPRAkNaXOzVWk0Htik3J/w
In-Reply-To: <20171220.125858.1378708055295289661.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>



On 20/12/2017 19:58, David Miller wrote:
> From: Saeed Mahameed <saeedm-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Date: Tue, 19 Dec 2017 12:33:36 -0800
> 
>> +static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
>> +{
>> +	u8 rep_type = 0;
>> +	int err;
>> +
>> +	while (rep_type < NUM_REP_TYPES) {
>> +		err = esw_offloads_load_reps_type(esw, nvports,
>> +						  rep_type);
>> +		if (err)
>> +			goto err_reps;
>> +		rep_type++;
>> +	}
> 
> Please, don't obfuscate what is a normal for() loop:
> 
> 	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type
In case of an error I'm using a while loop, wanted to use a
while loop on the good path as well, will change to a for loop.

> 
> Thanks.

> 

Thanks!
Mark
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [for-next 01/11] net/mlx5: E-Switch, Refactor vport representors initialization
From: Mark Bloch @ 2017-12-20 19:21 UTC (permalink / raw)
  To: David Miller, saeedm; +Cc: dledford, netdev, linux-rdma, leonro
In-Reply-To: <20171220.125734.63810464667583004.davem@davemloft.net>



On 20/12/2017 19:57, David Miller wrote:
> From: Saeed Mahameed <saeedm@mellanox.com>
> Date: Tue, 19 Dec 2017 12:33:30 -0800
> 
>> +int esw_offloads_init_reps(struct mlx5_eswitch *esw)
>> +{
>> +	struct mlx5_core_dev *dev = esw->dev;
>> +	struct mlx5_esw_offload *offloads;
>> +	struct mlx5_eswitch_rep *rep;
>> +	int total_vfs = MLX5_TOTAL_VPORTS(dev);
>> +	u8 hw_id[ETH_ALEN];
>> +	int vport;
> 
> Reverse christmas-tree please.
we need dev for  MLX5_TOTAL_VPORTS, so we can't have proper reverse christms-tree,
I'll just remove the initialization of total_vfs and do it after the declarations,
and switch between total_vfs and hw_id. 

> 
>> +	esw->offloads.vport_reps =
>> +		kcalloc(total_vfs, sizeof(struct mlx5_eswitch_rep),
>> +			GFP_KERNEL);
> 
> That looks really unpleasant:
> 
> 	x = kcalloc(y,
> 		    z, GFP_KERNEL);
> 
> would look so much nicer.
> 
No problem.

Thanks,
Mark.

^ permalink raw reply

* Re: [patch net-next 00/10] Add support for resource abstraction
From: David Ahern @ 2017-12-20 19:23 UTC (permalink / raw)
  To: Jiri Pirko, netdev
  Cc: davem, arkadis, mlxsw, andrew, vivien.didelot, f.fainelli,
	michael.chan, ganeshgr, saeedm, matanb, leonro, idosch,
	jakub.kicinski, ast, daniel, simon.horman, pieter.jansenvanvuuren,
	john.hurley, alexander.h.duyck, linville, gospo, steven.lin1,
	yuvalm, ogerlitz, roopa
In-Reply-To: <20171220115821.22171-1-jiri@resnulli.us>

On 12/20/17 4:58 AM, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> Many of the ASIC's internal resources are limited and are shared between
> several hardware procedures. For example, unified hash-based memory can
> be used for many lookup purposes, like FDB and LPM. In many cases the user
> can provide a partitioning scheme for such a resource in order to perform
> fine tuning for his application. In such cases performing driver reload is
> needed for the changes to take place, thus this patchset also adds support
> for hot reload.
> 
> Such an abstraction can be coupled with devlink's dpipe interface, which
> models the ASIC's pipeline as a graph of match/action tables. By modeling
> the hardware resource object, and by coupling it to several dpipe tables,
> further visibility can be achieved in order to debug ASIC-wide issues.
> 
> The proposed interface will provide the user the ability to understand the
> limitations of the hardware, and receive notification regarding its occupancy.
> Furthermore, monitoring the resource occupancy can be done in real-time and
> can be useful in many cases.
> ---
> Userspace part prototype can be found at https://github.com/arkadis/iproute2/
> at resource_dev branch.
> 

The breakout problem I mentioned in the last round of patches has not
been fixed:

$ ./devlink port split swp1 count 4
<hangs forever>

$ ps -C devlink
  PID TTY          TIME CMD
 4530 pts/0    00:00:00 devlink

$ cat /proc/4530/stack
[<0>] devlink_port_unregister+0x29/0x60 [devlink]
[<0>] mlxsw_core_port_fini+0x25/0x50 [mlxsw_core]
[<0>] mlxsw_sp_port_remove+0xf0/0x100 [mlxsw_spectrum]
[<0>] mlxsw_sp_port_split+0xdc/0x240 [mlxsw_spectrum]
[<0>] mlxsw_devlink_port_split+0x36/0x50 [mlxsw_core]
[<0>] devlink_nl_cmd_port_split_doit+0x3d/0x50 [devlink]
[<0>] genl_family_rcv_msg+0x1c4/0x380
[<0>] genl_rcv_msg+0x4c/0x90
[<0>] netlink_rcv_skb+0xe7/0x120
[<0>] genl_rcv+0x28/0x40
[<0>] netlink_unicast+0x181/0x230
[<0>] netlink_sendmsg+0x273/0x350
[<0>] SYSC_sendto+0x10e/0x1a0
[<0>] SyS_sendto+0xe/0x10
[<0>] do_syscall_64+0x5c/0x120
[<0>] entry_SYSCALL64_slow_path+0x25/0x25
[<0>] 0xffffffffffffffff


I rebuilt the tool from remotes/arkadi/resource_dev and I am not seeing
anything from the devlink command:

$ ./devlink resource show pci/0000:03:00.0
devlink answers: Success

Checking the kernel:

$ grep mlxsw_sp_resource_ /proc/kallsyms
ffffffffa0145080 t mlxsw_sp_resource_kvd_size_validate	[mlxsw_spectrum]
ffffffffa01450a0 t mlxsw_sp_resource_kvd_linear_size_validate
[mlxsw_spectrum]
ffffffffa01456a0 t mlxsw_sp_resource_kvd_hash_double_size_validate
[mlxsw_spectrum]
ffffffffa0145d40 t mlxsw_sp_resource_kvd_linear_occ_get	[mlxsw_spectrum]
ffffffffa0145d70 t mlxsw_sp_resource_kvd_hash_single_size_validate
[mlxsw_spectrum]

^ permalink raw reply

* Re: [PATCH net-next v4 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events
From: David Miller @ 2017-12-20 19:24 UTC (permalink / raw)
  To: mhiramat
  Cc: mingo, ian.mcdonald, vyasevich, stephen, rostedt, peterz, tglx,
	linux-kernel, hpa, gerrit, nhorman, dccp, netdev, linux-sctp, sfr
In-Reply-To: <20171220.142040.593760128783699712.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Wed, 20 Dec 2017 14:20:40 -0500 (EST)

> From: Masami Hiramatsu <mhiramat@kernel.org>
> Date: Wed, 20 Dec 2017 13:14:11 +0900
> 
>> This series is v4 of the replacement of jprobe usage with trace
>> events. This version is rebased on net-next, fixes a build warning
>> and moves a temporal variable definition in a block.
>> 
>> Previous version is here;
>> https://lkml.org/lkml/2017/12/19/153
>> 
>> Changes from v3:
>>   All: Rebased on net-next
>>   [3/6]: fixes a build warning for i386 by casting pointer unsigned
>>         long instead of __u64, and moves a temporal variable
>>          definition in a block.
> 
> Looks good, series applied to net-next, thanks.

Actually, this doesn't even compile, so I've reverted:

[davem@dhcp-10-15-49-227 net-next]$ make -s -j16
In file included from net/dccp/trace.h:105:0,
                 from net/dccp/proto.c:42:
./include/trace/define_trace.h:89:42: fatal error: ./trace.h: No such file or directory
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
                                          ^
compilation terminated.

^ permalink raw reply

* Re: [PATCH] selftests: net: Adding config fragment CONFIG_NUMA=y
From: David Miller @ 2017-12-20 19:25 UTC (permalink / raw)
  To: naresh.kamboju; +Cc: netdev, guro, bamvor.zhangjian, shuahkh
In-Reply-To: <1513754422-3444-1-git-send-email-naresh.kamboju@linaro.org>

From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Wed, 20 Dec 2017 12:50:22 +0530

> kernel config fragement CONFIG_NUMA=y is need for reuseport_bpf_numa.
> 
> Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH V3 net-next 00/17] add some features and fix some bugs for HNS3 driver
From: David Miller @ 2017-12-20 19:28 UTC (permalink / raw)
  To: lipeng321; +Cc: netdev, linux-kernel, linuxarm, salil.mehta
In-Reply-To: <1513759399-40915-1-git-send-email-lipeng321@huawei.com>

From: Lipeng <lipeng321@huawei.com>
Date: Wed, 20 Dec 2017 16:43:02 +0800

> This patchset adds some new feature support and fixes some bugs:
> [Patch 1/17 - 5/17] add the support to modify/query the tqp number
> through ethtool -L/l command, and also fix some related bugs for
> change tqp number.
> [Patch 6/17 - 9-17] add support vlan tag offload on tx&&rx direction
> for pf, and fix some related bugs.
> [patch 10/17 - 11/17] fix bugs for auto negotiation.
> [patch 12/17] adds support for ethtool command set_pauseparam.
> [patch 13/17 - 14/17] add support to update flow control settings after
> autoneg.
> [patch 15/17 - 17/17] fix some other bugs in net-next.

In your From: email field, as well as you and your colleagues signoffs
you are not using your full, real, name:

====================
From: Lipeng <lipeng321@huawei.com>
====================
Signed-off-by: qumingguang <qumingguang@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
====================

Please fix this.

^ permalink raw reply

* Re: [PATCH 00/19] pull request for net-next: batman-adv 2017-12-20
From: David Miller @ 2017-12-20 19:39 UTC (permalink / raw)
  To: sw; +Cc: netdev, b.a.t.m.a.n
In-Reply-To: <20171220110124.13117-1-sw@simonwunderlich.de>

From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Wed, 20 Dec 2017 12:01:05 +0100

> here is the updated feature/cleanup pull request of batman-adv to go
> into net-next. Compared to batman-adv 2017-12-15 pull request, we have 
> dropped the 'batman-adv: Add License-Filename to GPL-2.0 files' as per
> your suggestion.
> 
> Please pull or let me know of any problem!

Pulled, but there was a minor docbook annotation conflict I had to
address.

This pull added () to the function names in the docbook comments,
but this overlapped with the fix of a variable name in the docbook
info for the same function.

It was trivial, but please take a look.

Thanks.

^ permalink raw reply

* Re: [patch net-next 02/10] devlink: Add support for resource abstraction
From: David Miller @ 2017-12-20 19:43 UTC (permalink / raw)
  To: jiri
  Cc: netdev, arkadis, mlxsw, andrew, vivien.didelot, f.fainelli,
	michael.chan, ganeshgr, saeedm, matanb, leonro, idosch,
	jakub.kicinski, ast, daniel, simon.horman, pieter.jansenvanvuuren,
	john.hurley, alexander.h.duyck, linville, gospo, steven.lin1,
	yuvalm, ogerlitz, dsa, roopa
In-Reply-To: <20171220115821.22171-3-jiri@resnulli.us>

From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 20 Dec 2017 12:58:13 +0100

> From: Arkadi Sharshevsky <arkadis@mellanox.com>
> 
> Add support for hardware resource abstraction over devlink. Each resource
> is identified via id, furthermore it contains information regarding its
> size and its related sub resources. Each resource can also provide its
> current occupancy.
> 
> In some cases the sizes of some resources can be changed, yet for those
> changes to take place a hot driver reload may be needed. The reload
> capability will be introduced in the next patch.
> 
> Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

In what units are these sizes?  If it depends upon the resource, it would
be great to have a way to introspect the units given a resource.

> +	struct devlink_resource_ops *resource_ops;

Const?

> +static inline int
> +devlink_resource_register(struct devlink *devlink,
> +			  const char *resource_name,
> +			  bool top_hierarchy,
> +			  u64 resource_size,
> +			  u64 resource_id,
> +			  u64 parent_resource_id,
> +			  struct devlink_resource_ops *resource_ops)

Const for resource_ops?

> +int devlink_resource_register(struct devlink *devlink,
> +			      const char *resource_name,
> +			      bool top_hierarchy,
> +			      u64 resource_size,
> +			      u64 resource_id,
> +			      u64 parent_resource_id,
> +			      struct devlink_resource_ops *resource_ops)

Likewise.

^ permalink raw reply

* Re: [PATCH net-next] dev: Correctly get length of alias string in dev_set_alias()
From: Serhey Popovich @ 2017-12-20 19:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20171220.110529.947130105820119272.davem@davemloft.net>


[-- Attachment #1.1: Type: text/plain, Size: 1639 bytes --]

David Miller wrote:
> From: Serhey Popovych <serhe.popovych@gmail.com>
> Date: Mon, 18 Dec 2017 23:38:35 +0200
> 
>> We supply number of bytes available in @alias via @len
>> parameter to dev_set_alias() which is not the same
>> as zero terminated string length that can be shorter.
>>
>> Both dev_set_alias() users (rtnetlink and sysfs) can
>> submit number of bytes up to IFALIASZ with actual string
>> length slightly shorter by putting '\0' not at @len - 1.
>>
>> Use strnlen() to get length of zero terminated string
>> and not access beyond @len. Correct comment about @len
>> and explain how to unset alias (i.e. use zero for @len).
>>
>> Signed-off-by: Serhey Popovych <serhe.popovych@gmail.com>
> 
> I don't really see this as useful, really.
> 
> In the sysfs case, we are not presented with a NULL terminated string.
> Instead, the net sysfs code gives us a length that goes up until the
> trailing newline character.  The sysfs case is never larger than the
> actual string size + 1.
> 
> The netlink attribute is usually sized appropriately for whatever the
> string length actually is.

Sorry but I do not mean larger. I mean shorter. When nla_len() >
strlen() we allocate extra space up to IFALIASZ - 1.

This is definitely fix nothing: we never get above the bounds, but
in case if NULL terminator is in the middle of string with nla_len()
we might allocate unused extra space.

Sorry again if I'm not correct with above assumption.

> 
> This therefore just seems to add an new strnlen() unnecessarily to
> this code path, which rarely does anything helpful.
> 
> Thanks.
> 



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* Re: [for-next 01/11] net/mlx5: E-Switch, Refactor vport representors initialization
From: David Miller @ 2017-12-20 19:45 UTC (permalink / raw)
  To: markb-VPRAkNaXOzVWk0Htik3J/w
  Cc: saeedm-VPRAkNaXOzVWk0Htik3J/w, dledford-H+wXaHxf7aLQT0dZR+AlfA,
	netdev-u79uwXL29TY76Z2rM5mHXA, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	leonro-VPRAkNaXOzVWk0Htik3J/w
In-Reply-To: <066d5ac8-e95a-f4ec-fda7-31716242cc05-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

From: Mark Bloch <markb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Date: Wed, 20 Dec 2017 21:21:51 +0200

> 
> 
> On 20/12/2017 19:57, David Miller wrote:
>> From: Saeed Mahameed <saeedm-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
>> Date: Tue, 19 Dec 2017 12:33:30 -0800
>> 
>>> +int esw_offloads_init_reps(struct mlx5_eswitch *esw)
>>> +{
>>> +	struct mlx5_core_dev *dev = esw->dev;
>>> +	struct mlx5_esw_offload *offloads;
>>> +	struct mlx5_eswitch_rep *rep;
>>> +	int total_vfs = MLX5_TOTAL_VPORTS(dev);
>>> +	u8 hw_id[ETH_ALEN];
>>> +	int vport;
>> 
>> Reverse christmas-tree please.
> we need dev for  MLX5_TOTAL_VPORTS, so we can't have proper reverse christms-tree,

I know that there is this dependency.

> I'll just remove the initialization of total_vfs and do it after the declarations,
> and switch between total_vfs and hw_id. 

Exactly.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next] dev: Correctly get length of alias string in dev_set_alias(),Re: [PATCH net-next] dev: Correctly get length of alias string in dev_set_alias()
From: David Miller @ 2017-12-20 19:55 UTC (permalink / raw)
  To: serhe.popovych; +Cc: netdev
In-Reply-To: <de0b307c-63e7-b786-e9d1-8b4a94f61d9a@gmail.com>

From: Serhey Popovich <serhe.popovych@gmail.com>
Date: Wed, 20 Dec 2017 21:44:46 +0200

> Sorry but I do not mean larger. I mean shorter. When nla_len() >
> strlen() we allocate extra space up to IFALIASZ - 1.

But the user typically does not do that.

> This is definitely fix nothing: we never get above the bounds, but
> in case if NULL terminator is in the middle of string with nla_len()
> we might allocate unused extra space.

See above, not worth optimizing for.

^ permalink raw reply

* Re: [net 1/1] tipc: remove joining group member from congested list
From: David Miller @ 2017-12-20 19:57 UTC (permalink / raw)
  To: jon.maloy
  Cc: netdev, mohan.krishna.ghanta.krishnamurthy, tung.q.nguyen,
	hoang.h.le, canh.d.luu, ying.xue, tipc-discussion
In-Reply-To: <1513764195-7731-1-git-send-email-jon.maloy@ericsson.com>

From: Jon Maloy <jon.maloy@ericsson.com>
Date: Wed, 20 Dec 2017 11:03:15 +0100

> When we receive a JOIN message from a peer member, the message may
> contain an advertised window value ADV_IDLE that permits removing the
> member in question from the tipc_group::congested list. However, since
> the removal has been made conditional on that the advertised window is
> *not* ADV_IDLE, we miss this case. This has the effect that a sender
> sometimes may enter a state of permanent, false, broadcast congestion.
> 
> We fix this by unconditinally removing the member from the congested
> list before calling tipc_member_update(), which might potentially sort
> it into the list again.
> 
> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>

I bet this bug wasn't fun to track down.

Applied, thanks Jon.

^ permalink raw reply

* Re: RCU callback crashes
From: Jiri Pirko @ 2017-12-20 19:59 UTC (permalink / raw)
  To: Cong Wang; +Cc: Jakub Kicinski, netdev@vger.kernel.org
In-Reply-To: <CAM_iQpWUjfv2-Sirmdb5WfV4pZ4uF0m7=HR5YGWaKxb4KHp8gQ@mail.gmail.com>

Wed, Dec 20, 2017 at 07:17:50PM CET, xiyou.wangcong@gmail.com wrote:
>On Tue, Dec 19, 2017 at 10:34 PM, Jakub Kicinski <kubakici@wp.pl> wrote:
>> Ah, no object debug but KASAN on produces this:
>>
>
>
>I bet it is an ingress qdisc which is being freed?
>
>
>
>> [   39.268209] BUG: KASAN: use-after-free in cpu_needs_another_gp+0x246/0x2b0
>> [   39.275965] Read of size 8 at addr ffff8803aa64f138 by task swapper/13/0
>> [   39.283524]
>> [   39.285256] CPU: 13 PID: 0 Comm: swapper/13 Not tainted 4.15.0-rc3-perf-00955-g1d0b01347dd5-dirty #8
>> [   39.295535] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.3.4 11/08/2016
>> [   39.303969] Call Trace:
>> [   39.306769]  <IRQ>
>> [   39.309088]  dump_stack+0xa6/0x118
>> [   39.312957]  ? _atomic_dec_and_lock+0xe8/0xe8
>> [   39.317895]  ? cpu_needs_another_gp+0x246/0x2b0
>> [   39.323030]  print_address_description+0x6a/0x270
>> [   39.328380]  ? cpu_needs_another_gp+0x246/0x2b0
>> [   39.333510]  kasan_report+0x23f/0x350
>> [   39.337672]  cpu_needs_another_gp+0x246/0x2b0
>> ...
>> [   39.383026]  rcu_process_callbacks+0x1a0/0x620
>> ...
>
>
>This is confusing.
>
>I guess it is q->miniqp which is freed in qdisc_graft() without properly
>waiting for rcu readers?

miniqp is inside qdisc private data:
struct ingress_sched_data {
        struct tcf_block *block;
        struct tcf_block_ext_info block_info;
        struct mini_Qdisc_pair miniqp;
};

That is freed along with the qdisc itself in:
qdisc_destroy->qdisc_free

Before miniq, tp was checked in the rcu reader path. In case it was not
null, q was processed. In slow patch, tp is freed after rcu grace period:
tcf_proto_destroy->kfree_rcu

I assumed that since q is processed in rcu reader, it is also freed after
a grace period, but now looking at the code I don't see it happening
like that.

So I think that change to miniq made the existing race window
a bit wider and easier to hit.

I believe that calling kfree_rcu by call_rcu should resolve this.

^ permalink raw reply

* Re: [patch net-next 02/10] devlink: Add support for resource abstraction
From: Jiri Pirko @ 2017-12-20 20:01 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, arkadis, mlxsw, andrew, vivien.didelot, f.fainelli,
	michael.chan, ganeshgr, saeedm, matanb, leonro, idosch,
	jakub.kicinski, ast, daniel, simon.horman, pieter.jansenvanvuuren,
	john.hurley, alexander.h.duyck, linville, gospo, steven.lin1,
	yuvalm, ogerlitz, dsa, roopa
In-Reply-To: <20171220.144358.1892414104907740492.davem@davemloft.net>

Wed, Dec 20, 2017 at 08:43:58PM CET, davem@davemloft.net wrote:
>From: Jiri Pirko <jiri@resnulli.us>
>Date: Wed, 20 Dec 2017 12:58:13 +0100
>
>> From: Arkadi Sharshevsky <arkadis@mellanox.com>
>> 
>> Add support for hardware resource abstraction over devlink. Each resource
>> is identified via id, furthermore it contains information regarding its
>> size and its related sub resources. Each resource can also provide its
>> current occupancy.
>> 
>> In some cases the sizes of some resources can be changed, yet for those
>> changes to take place a hot driver reload may be needed. The reload
>> capability will be introduced in the next patch.
>> 
>> Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>
>In what units are these sizes?  If it depends upon the resource, it would
>be great to have a way to introspect the units given a resource.

In this case the unit is "item". But you have a point. We'll figure this
out.


>
>> +	struct devlink_resource_ops *resource_ops;
>
>Const?
>
>> +static inline int
>> +devlink_resource_register(struct devlink *devlink,
>> +			  const char *resource_name,
>> +			  bool top_hierarchy,
>> +			  u64 resource_size,
>> +			  u64 resource_id,
>> +			  u64 parent_resource_id,
>> +			  struct devlink_resource_ops *resource_ops)
>
>Const for resource_ops?
>
>> +int devlink_resource_register(struct devlink *devlink,
>> +			      const char *resource_name,
>> +			      bool top_hierarchy,
>> +			      u64 resource_size,
>> +			      u64 resource_id,
>> +			      u64 parent_resource_id,
>> +			      struct devlink_resource_ops *resource_ops)
>
>Likewise.

Ok, thanks!

^ permalink raw reply

* Re: [patch net-next 00/10] Add support for resource abstraction
From: Jiri Pirko @ 2017-12-20 20:03 UTC (permalink / raw)
  To: David Ahern
  Cc: netdev, davem, arkadis, mlxsw, andrew, vivien.didelot, f.fainelli,
	michael.chan, ganeshgr, saeedm, matanb, leonro, idosch,
	jakub.kicinski, ast, daniel, simon.horman, pieter.jansenvanvuuren,
	john.hurley, alexander.h.duyck, linville, gospo, steven.lin1,
	yuvalm, ogerlitz, roopa
In-Reply-To: <e3533808-a00c-265a-554d-edab794895c3@cumulusnetworks.com>

Wed, Dec 20, 2017 at 08:23:54PM CET, dsa@cumulusnetworks.com wrote:
>On 12/20/17 4:58 AM, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> Many of the ASIC's internal resources are limited and are shared between
>> several hardware procedures. For example, unified hash-based memory can
>> be used for many lookup purposes, like FDB and LPM. In many cases the user
>> can provide a partitioning scheme for such a resource in order to perform
>> fine tuning for his application. In such cases performing driver reload is
>> needed for the changes to take place, thus this patchset also adds support
>> for hot reload.
>> 
>> Such an abstraction can be coupled with devlink's dpipe interface, which
>> models the ASIC's pipeline as a graph of match/action tables. By modeling
>> the hardware resource object, and by coupling it to several dpipe tables,
>> further visibility can be achieved in order to debug ASIC-wide issues.
>> 
>> The proposed interface will provide the user the ability to understand the
>> limitations of the hardware, and receive notification regarding its occupancy.
>> Furthermore, monitoring the resource occupancy can be done in real-time and
>> can be useful in many cases.
>> ---
>> Userspace part prototype can be found at https://github.com/arkadis/iproute2/
>> at resource_dev branch.
>> 
>
>The breakout problem I mentioned in the last round of patches has not
>been fixed:

Oops, I guess that we missed that. Thanks!

^ permalink raw reply

* Re: [PATCH net-next] net: packet: allow bind to device which is !IFF_UP
From: David Miller @ 2017-12-20 20:06 UTC (permalink / raw)
  To: cugyly; +Cc: netdev, Linyu.Yuan
In-Reply-To: <1513776049-9141-1-git-send-email-cugyly@163.com>

From: yuan linyu <cugyly@163.com>
Date: Wed, 20 Dec 2017 21:20:49 +0800

> From: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
> 
> this try to allow tcpdump to capture packet once device IFF_UP
> 
> Signed-off-by: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>

I don't think this is such a great idea.

Even if it "works":

1) The behavior goes back decades, this means real applications can
   be broken by this change.

2) The feedback is useful, if I ran tcpdump on the wrong interface
   I'd like to get this error if the interface I mistakenly chose
   was done in order to help me figure this out.

^ permalink raw reply

* [PATCH net v3] openvswitch: Fix pop_vlan action for double tagged frames
From: Eric Garver @ 2017-12-20 20:09 UTC (permalink / raw)
  To: netdev; +Cc: ovs-dev, Jiri Benc

skb_vlan_pop() expects skb->protocol to be a valid TPID for double
tagged frames. So set skb->protocol to the TPID and let skb_vlan_pop()
shift the true ethertype into position for us.

Fixes: 5108bbaddc37 ("openvswitch: add processing of L3 packets")
Signed-off-by: Eric Garver <e@erig.me>
---
 net/openvswitch/flow.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..f039064ce922 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 			return -EINVAL;
 
 		skb_reset_network_header(skb);
+		key->eth.type = skb->protocol;
 	} else {
 		eth = eth_hdr(skb);
 		ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 		if (unlikely(parse_vlan(skb, key)))
 			return -ENOMEM;
 
-		skb->protocol = parse_ethertype(skb);
-		if (unlikely(skb->protocol == htons(0)))
+		key->eth.type = parse_ethertype(skb);
+		if (unlikely(key->eth.type == htons(0)))
 			return -ENOMEM;
 
+		/* Multiple tagged packets need to retain TPID to satisfy
+		 * skb_vlan_pop(), which will later shift the ethertype into
+		 * skb->protocol.
+		 */
+		if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+			skb->protocol = key->eth.cvlan.tpid;
+		else
+			skb->protocol = key->eth.type;
+
 		skb_reset_network_header(skb);
 		__skb_push(skb, skb->data - skb_mac_header(skb));
 	}
 	skb_reset_mac_len(skb);
-	key->eth.type = skb->protocol;
 
 	/* Network layer. */
 	if (key->eth.type == htons(ETH_P_IP)) {
-- 
2.12.0

^ permalink raw reply related

* [PATCH] net: Revert "net_sched: no need to free qdisc in RCU callback"
From: John Fastabend @ 2017-12-20 20:09 UTC (permalink / raw)
  To: xiyou.wangcong, jiri, davem; +Cc: kubakici, netdev, eric.dumazet

RCU grace period is needed for lockless qdiscs added in the commit
c5ad119fb6c09 ("net: sched: pfifo_fast use skb_array").

It is needed now that qdiscs may be lockless otherwise we risk
free'ing a qdisc that is still in use from datapath. Additionally,
push list cleanup into RCU callback. Otherwise we risk the datapath
adding skbs during removal.

Fixes: c5ad119fb6c09 ("net: sched: pfifo_fast use skb_array")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/net/sch_generic.h |    1 +
 net/sched/sch_generic.c   |   50 ++++++++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bc6b25f..a65306b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -97,6 +97,7 @@ struct Qdisc {
 	unsigned long		state;
 	struct Qdisc            *next_sched;
 	struct sk_buff_head	skb_bad_txq;
+	struct rcu_head		rcu_head;
 	int			padded;
 	refcount_t		refcnt;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 876fab2..ab497ef 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -873,31 +873,15 @@ void qdisc_reset(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_reset);
 
-static void qdisc_free(struct Qdisc *qdisc)
+static void qdisc_rcu_free(struct rcu_head *head)
 {
-	if (qdisc_is_percpu_stats(qdisc)) {
-		free_percpu(qdisc->cpu_bstats);
-		free_percpu(qdisc->cpu_qstats);
-	}
-
-	kfree((char *) qdisc - qdisc->padded);
-}
-
-void qdisc_destroy(struct Qdisc *qdisc)
-{
-	const struct Qdisc_ops  *ops = qdisc->ops;
+	struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
+	const struct Qdisc_ops *ops = qdisc->ops;
 	struct sk_buff *skb, *tmp;
 
-	if (qdisc->flags & TCQ_F_BUILTIN ||
-	    !refcount_dec_and_test(&qdisc->refcnt))
-		return;
-
-#ifdef CONFIG_NET_SCHED
-	qdisc_hash_del(qdisc);
-
-	qdisc_put_stab(rtnl_dereference(qdisc->stab));
-#endif
-	gen_kill_estimator(&qdisc->rate_est);
+	/* At this point no outstanding references to this Qdisc should
+	 * exist in the datapath so its safe to clean up skb lists, etc.
+	 */
 	if (ops->reset)
 		ops->reset(qdisc);
 	if (ops->destroy)
@@ -916,7 +900,27 @@ void qdisc_destroy(struct Qdisc *qdisc)
 		kfree_skb_list(skb);
 	}
 
-	qdisc_free(qdisc);
+	if (qdisc_is_percpu_stats(qdisc)) {
+		free_percpu(qdisc->cpu_bstats);
+		free_percpu(qdisc->cpu_qstats);
+	}
+
+	kfree((char *) qdisc - qdisc->padded);
+}
+
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+	    !refcount_dec_and_test(&qdisc->refcnt))
+		return;
+
+#ifdef CONFIG_NET_SCHED
+	qdisc_hash_del(qdisc);
+
+	qdisc_put_stab(rtnl_dereference(qdisc->stab));
+#endif
+	gen_kill_estimator(&qdisc->rate_est);
+	call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
 }
 EXPORT_SYMBOL(qdisc_destroy);
 

^ permalink raw reply related

* Re: [PATCH v2] net: ibm: emac: support RGMII-[RX|TX]ID phymode
From: David Miller @ 2017-12-20 20:10 UTC (permalink / raw)
  To: chunkeey; +Cc: netdev, andrew, christophe.jaillet
In-Reply-To: <20171220160201.17143-1-chunkeey@gmail.com>

From: Christian Lamparter <chunkeey@gmail.com>
Date: Wed, 20 Dec 2017 17:02:01 +0100

> diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h
> index 5afcc27ceebb..8c6d2af7281b 100644
> --- a/drivers/net/ethernet/ibm/emac/emac.h
> +++ b/drivers/net/ethernet/ibm/emac/emac.h
> @@ -112,6 +112,9 @@ struct emac_regs {
>  #define PHY_MODE_RMII	PHY_INTERFACE_MODE_RMII
>  #define PHY_MODE_SMII	PHY_INTERFACE_MODE_SMII
>  #define PHY_MODE_RGMII	PHY_INTERFACE_MODE_RGMII
> +#define PHY_MODE_RGMII_ID	PHY_INTERFACE_MODE_RGMII_ID
> +#define PHY_MODE_RGMII_RXID	PHY_INTERFACE_MODE_RGMII_RXID
> +#define PHY_MODE_RGMII_TXID	PHY_INTERFACE_MODE_RGMII_TXID
>  #define PHY_MODE_TBI	PHY_INTERFACE_MODE_TBI
>  #define PHY_MODE_GMII	PHY_INTERFACE_MODE_GMII
>  #define PHY_MODE_RTBI	PHY_INTERFACE_MODE_RTBI

Why does this driver do all of this CPP macro aliasing?

It's really cruddy because anyone who now reads this code:

>  static inline int rgmii_valid_mode(int phy_mode)
>  {
> -	return  phy_mode == PHY_MODE_GMII ||
> +	return  phy_interface_mode_is_rgmii(phy_mode) ||
> +		phy_mode == PHY_MODE_GMII ||
>  		phy_mode == PHY_MODE_MII ||
> -		phy_mode == PHY_MODE_RGMII ||
>  		phy_mode == PHY_MODE_TBI ||
>  		phy_mode == PHY_MODE_RTBI;
>  }

will read this and say "Oh the function tests against these weird
PHY_MODE_* aliases, but the helper function phy_interface_mode_is_rgmii()
tests against PHY_INTERFACE_MODE_*, what is going on?"

I hate to do this to you, but please get rid of these confusing and
completely unnecessary PHY_MODE_* CPP aliases first, and just use the
proper PHY_INTERFACE_MODE_* values consistently.

Thank you.

^ permalink raw reply

* [PATCH net V3] net: reevalulate autoflowlabel setting after sysctl setting
From: Shaohua Li @ 2017-12-20 20:10 UTC (permalink / raw)
  To: netdev, davem
  Cc: Kernel Team, Shaohua Li, Martin KaFai Lau, Eric Dumazet,
	Tom Herbert

From: Shaohua Li <shli@fb.com>

sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2.
If sockopt doesn't set autoflowlabel, outcome packets from the hosts are
supposed to not include flowlabel. This is true for normal packet, but
not for reset packet.

The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if
we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't
changed, so the sock will keep the old behavior in terms of auto
flowlabel. Reset packet is suffering from this problem, because reset
packet is sent from a special control socket, which is created at boot
time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control
socket will always have its ipv6_pinfo.autoflowlabel set, even after
user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always
have flowlabel. Normal sock created before sysctl setting suffers from
the same issue. We can't even turn off autoflowlabel unless we kill all
socks in the hosts.

To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the
autoflowlabel setting from user, otherwise we always call
ip6_default_np_autolabel() which has the new settings of sysctl.

Note, this changes behavior a little bit. Before commit 42240901f7c4
(ipv6: Implement different admin modes for automatic flow labels), the
autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes,
existing connection will change autoflowlabel behavior. After that
commit, autoflowlabel behavior is sticky in the whole life of the sock.
With this patch, the behavior isn't sticky again.

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Tom Herbert <tom@quantonium.net>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 include/linux/ipv6.h     |  3 ++-
 net/ipv6/af_inet6.c      |  1 -
 net/ipv6/ip6_output.c    | 12 ++++++++++--
 net/ipv6/ipv6_sockglue.c |  1 +
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb18c62..8415bf1 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
 						 * 100: prefer care-of address
 						 */
 				dontfrag:1,
-				autoflowlabel:1;
+				autoflowlabel:1,
+				autoflowlabel_set:1;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f712..c9441ca 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->autoflowlabel = ip6_default_np_autolabel(net);
 	np->repflow	= net->ipv6.sysctl.flowlabel_reflect;
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a41..f7dd51c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+	if (!np->autoflowlabel_set)
+		return ip6_default_np_autolabel(net);
+	else
+		return np->autoflowlabel;
+}
+
 /*
  * xmit an sk_buff (used by TCP, SCTP and DCCP)
  * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 		hlimit = ip6_dst_hoplimit(dst);
 
 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
-						     np->autoflowlabel, fl6));
+				ip6_autoflowlabel(net, np), fl6));
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
 	ip6_flow_hdr(hdr, v6_cork->tclass,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
-					np->autoflowlabel, fl6));
+					ip6_autoflowlabel(net, np), fl6));
 	hdr->hop_limit = v6_cork->hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404fe..2d4680e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		break;
 	case IPV6_AUTOFLOWLABEL:
 		np->autoflowlabel = valbool;
+		np->autoflowlabel_set = 1;
 		retv = 0;
 		break;
 	case IPV6_RECVFRAGSIZE:
-- 
2.9.5

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox