Netdev List
 help / color / mirror / Atom feed
* [PATCH iproute2 v2 1/3] ss: allow AF_FAMILY constants >32
From: Stefan Hajnoczi @ 2017-10-06 15:48 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Jorgen Hansen, Dexuan Cui, Stefan Hajnoczi
In-Reply-To: <20171006154841.10495-1-stefanha@redhat.com>

Linux has more than 32 address families defined in <bits/socket.h>.  Use
a 64-bit type so all of them can be represented in the filter->families
bitmask.

It's easy to introduce bugs when using (1 << AF_FAMILY) because the
value is 32-bit.  This can produce incorrect results from bitmask
operations so introduce the FAMILY_MASK() macro to eliminate these bugs.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 misc/ss.c | 54 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/misc/ss.c b/misc/ss.c
index dd8dfaa4..005e781d 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -170,55 +170,57 @@ enum {
 struct filter {
 	int dbs;
 	int states;
-	int families;
+	uint64_t families;
 	struct ssfilter *f;
 	bool kill;
 };
 
+#define FAMILY_MASK(family) ((uint64_t)1 << (family))
+
 static const struct filter default_dbs[MAX_DB] = {
 	[TCP_DB] = {
 		.states   = SS_CONN,
-		.families = (1 << AF_INET) | (1 << AF_INET6),
+		.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 	},
 	[DCCP_DB] = {
 		.states   = SS_CONN,
-		.families = (1 << AF_INET) | (1 << AF_INET6),
+		.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 	},
 	[UDP_DB] = {
 		.states   = (1 << SS_ESTABLISHED),
-		.families = (1 << AF_INET) | (1 << AF_INET6),
+		.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 	},
 	[RAW_DB] = {
 		.states   = (1 << SS_ESTABLISHED),
-		.families = (1 << AF_INET) | (1 << AF_INET6),
+		.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 	},
 	[UNIX_DG_DB] = {
 		.states   = (1 << SS_CLOSE),
-		.families = (1 << AF_UNIX),
+		.families = FAMILY_MASK(AF_UNIX),
 	},
 	[UNIX_ST_DB] = {
 		.states   = SS_CONN,
-		.families = (1 << AF_UNIX),
+		.families = FAMILY_MASK(AF_UNIX),
 	},
 	[UNIX_SQ_DB] = {
 		.states   = SS_CONN,
-		.families = (1 << AF_UNIX),
+		.families = FAMILY_MASK(AF_UNIX),
 	},
 	[PACKET_DG_DB] = {
 		.states   = (1 << SS_CLOSE),
-		.families = (1 << AF_PACKET),
+		.families = FAMILY_MASK(AF_PACKET),
 	},
 	[PACKET_R_DB] = {
 		.states   = (1 << SS_CLOSE),
-		.families = (1 << AF_PACKET),
+		.families = FAMILY_MASK(AF_PACKET),
 	},
 	[NETLINK_DB] = {
 		.states   = (1 << SS_CLOSE),
-		.families = (1 << AF_NETLINK),
+		.families = FAMILY_MASK(AF_NETLINK),
 	},
 	[SCTP_DB] = {
 		.states   = SS_CONN,
-		.families = (1 << AF_INET) | (1 << AF_INET6),
+		.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 	},
 };
 
@@ -258,14 +260,14 @@ static void filter_db_set(struct filter *f, int db)
 static void filter_af_set(struct filter *f, int af)
 {
 	f->states	   |= default_afs[af].states;
-	f->families	   |= 1 << af;
+	f->families	   |= FAMILY_MASK(af);
 	do_default	    = 0;
 	preferred_family    = af;
 }
 
 static int filter_af_get(struct filter *f, int af)
 {
-	return f->families & (1 << af);
+	return !!(f->families & FAMILY_MASK(af));
 }
 
 static void filter_default_dbs(struct filter *f)
@@ -302,7 +304,7 @@ static void filter_merge_defaults(struct filter *f)
 			f->families |= default_dbs[db].families;
 	}
 	for (af = 0; af < AF_MAX; af++) {
-		if (!(f->families & (1 << af)))
+		if (!(f->families & FAMILY_MASK(af)))
 			continue;
 
 		if (!(default_afs[af].dbs & f->dbs))
@@ -2608,7 +2610,7 @@ static int show_one_inet_sock(const struct sockaddr_nl *addr,
 	struct inet_diag_msg *r = NLMSG_DATA(h);
 	struct sockstat s = {};
 
-	if (!(diag_arg->f->families & (1 << r->idiag_family)))
+	if (!(diag_arg->f->families & FAMILY_MASK(r->idiag_family)))
 		return 0;
 
 	parse_diag_msg(h, &s);
@@ -2802,7 +2804,7 @@ static int tcp_show(struct filter *f)
 		return -1;
 	}
 
-	if (f->families & (1<<AF_INET)) {
+	if (f->families & FAMILY_MASK(AF_INET)) {
 		if ((fp = net_tcp_open()) == NULL)
 			goto outerr;
 
@@ -2812,7 +2814,7 @@ static int tcp_show(struct filter *f)
 		fclose(fp);
 	}
 
-	if ((f->families & (1<<AF_INET6)) &&
+	if ((f->families & FAMILY_MASK(AF_INET6)) &&
 	    (fp = net_tcp6_open()) != NULL) {
 		setbuffer(fp, buf, bufsize);
 		if (generic_record_read(fp, tcp_show_line, f, AF_INET6))
@@ -2911,7 +2913,7 @@ static int udp_show(struct filter *f)
 	    && inet_show_netlink(f, NULL, IPPROTO_UDP) == 0)
 		return 0;
 
-	if (f->families&(1<<AF_INET)) {
+	if (f->families&FAMILY_MASK(AF_INET)) {
 		if ((fp = net_udp_open()) == NULL)
 			goto outerr;
 		if (generic_record_read(fp, dgram_show_line, f, AF_INET))
@@ -2919,7 +2921,7 @@ static int udp_show(struct filter *f)
 		fclose(fp);
 	}
 
-	if ((f->families&(1<<AF_INET6)) &&
+	if ((f->families&FAMILY_MASK(AF_INET6)) &&
 	    (fp = net_udp6_open()) != NULL) {
 		if (generic_record_read(fp, dgram_show_line, f, AF_INET6))
 			goto outerr;
@@ -2951,7 +2953,7 @@ static int raw_show(struct filter *f)
 	    inet_show_netlink(f, NULL, IPPROTO_RAW) == 0)
 		return 0;
 
-	if (f->families&(1<<AF_INET)) {
+	if (f->families&FAMILY_MASK(AF_INET)) {
 		if ((fp = net_raw_open()) == NULL)
 			goto outerr;
 		if (generic_record_read(fp, dgram_show_line, f, AF_INET))
@@ -2959,7 +2961,7 @@ static int raw_show(struct filter *f)
 		fclose(fp);
 	}
 
-	if ((f->families&(1<<AF_INET6)) &&
+	if ((f->families&FAMILY_MASK(AF_INET6)) &&
 	    (fp = net_raw6_open()) != NULL) {
 		if (generic_record_read(fp, dgram_show_line, f, AF_INET6))
 			goto outerr;
@@ -3703,13 +3705,13 @@ static int handle_follow_request(struct filter *f)
 	int groups = 0;
 	struct rtnl_handle rth;
 
-	if (f->families & (1 << AF_INET) && f->dbs & (1 << TCP_DB))
+	if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << TCP_DB))
 		groups |= 1 << (SKNLGRP_INET_TCP_DESTROY - 1);
-	if (f->families & (1 << AF_INET) && f->dbs & (1 << UDP_DB))
+	if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << UDP_DB))
 		groups |= 1 << (SKNLGRP_INET_UDP_DESTROY - 1);
-	if (f->families & (1 << AF_INET6) && f->dbs & (1 << TCP_DB))
+	if (f->families & FAMILY_MASK(AF_INET6) && f->dbs & (1 << TCP_DB))
 		groups |= 1 << (SKNLGRP_INET6_TCP_DESTROY - 1);
-	if (f->families & (1 << AF_INET6) && f->dbs & (1 << UDP_DB))
+	if (f->families & FAMILY_MASK(AF_INET6) && f->dbs & (1 << UDP_DB))
 		groups |= 1 << (SKNLGRP_INET6_UDP_DESTROY - 1);
 
 	if (groups == 0)
-- 
2.13.6

^ permalink raw reply related

* [PATCH iproute2 v2 0/3] ss: add AF_VSOCK support
From: Stefan Hajnoczi @ 2017-10-06 15:48 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Jorgen Hansen, Dexuan Cui, Stefan Hajnoczi

v2:
 * Use uint64_t instead of __u64 for filter->families
 * Added reference to net-next commit that merged vsock_diag.ko

This patch series adds AF_VSOCK support to ss(8).  AF_VSOCK is a host<->guest
communications channel supported by VMware, KVM (virtio-vsock), and Hyper-V.

To dump AF_VSOCK sockets:

  $ ss --vsock

The vsock_diag.ko module has now been merged in the Linux net-next tree.  I
have verified that the <linux/vm_sockets_diag.h> header copy in this patch
series is in sync with Linux net-next.  See commit
5820299a271fd3dc9b1733e1e10cd7b983edd028 ("Merge branch 'VSOCK-sock_diag'").

Stefan Hajnoczi (3):
  ss: allow AF_FAMILY constants >32
  include: add <linux/vm_sockets_diag.h>
  ss: add AF_VSOCK support

 include/linux/vm_sockets_diag.h |  33 ++++++
 misc/ss.c                       | 238 +++++++++++++++++++++++++++++++++++-----
 man/man8/ss.8                   |   8 +-
 3 files changed, 249 insertions(+), 30 deletions(-)
 create mode 100644 include/linux/vm_sockets_diag.h

-- 
2.13.6

^ permalink raw reply

* Re: [net-next V4 PATCH 2/5] bpf: XDP_REDIRECT enable use of cpumap
From: Jesper Dangaard Brouer @ 2017-10-06 15:45 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: netdev, jakub.kicinski, Michael S. Tsirkin, pavel.odintsov,
	Jason Wang, mchan, John Fastabend, peter.waskiewicz.jr,
	Daniel Borkmann, Alexei Starovoitov, Andy Gospodarek, brouer
In-Reply-To: <20171006131748.75185f65@redhat.com>

On Fri, 6 Oct 2017 13:17:48 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> > > +static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
> > > +{
> > > +	switch (map->map_type) {
> > > +	case BPF_MAP_TYPE_DEVMAP:
> > > +		return __dev_map_lookup_elem(map, index);
> > > +	case BPF_MAP_TYPE_CPUMAP:
> > > +		return __cpu_map_lookup_elem(map, index);
> > > +	default:
> > > +		return NULL;
> > > +	}    
> > 
> > Should we just have a callback and instead of the above use
> > map->ptr_lookup_elem() (or however we name it) ... lot of it
> > is pretty much the same logic as with devmap.  
> 
> We could extend struct bpf_map *map with such a callback, I was just
> afraid that this would be too invasive.
> 
> Performance wise, I don't thinks will hurt too much.
> http://www.cipht.net/2017/10/03/are-jump-tables-always-fastest.html

Looking at the code, I would like to postpone this callback work until
after this patchset is merged.  As this work will also touch devmap +
sockmap.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH net-next 0/7] nfp: extend match and action for flower offload
From: Tom Herbert @ 2017-10-06 15:34 UTC (permalink / raw)
  To: Simon Horman
  Cc: David Miller, Jakub Kicinski, Linux Kernel Network Developers,
	oss-drivers
In-Reply-To: <1507278086-3102-1-git-send-email-simon.horman@netronome.com>

Simon,

Maybe a bit off topic, but I had the impression netronome would
support BPF so that filters could be programmed for arbitrary
protocols and fields. Is that true? If so, what is the relationship
between that functionality and these patches?

Thanks,
Tom

On Fri, Oct 6, 2017 at 1:21 AM, Simon Horman <simon.horman@netronome.com> wrote:
> Pieter says:
>
> This series extends flower offload match and action capabilities. It
> specifically adds offload capabilities for matching on MPLS, TTL, TOS
> and flow label. Furthermore offload capabilities for action have been
> expanded to include set ethernet, ipv4, ipv6, tcp and udp headers.
>
> Pieter Jansen van Vuuren (7):
>   nfp: add mpls match offloading support
>   nfp: add IPv4 ttl and tos match offloading support
>   nfp: add IPv6 ttl and tos match offloading support
>   nfp: add set ethernet header action flower offload
>   nfp: add set ipv4 header action flower offload
>   nfp: add set ipv6 source and destination address
>   nfp: add set tcp and udp header action flower offload
>
>  drivers/net/ethernet/netronome/nfp/flower/action.c | 245 +++++++++++++++++++++
>  drivers/net/ethernet/netronome/nfp/flower/cmsg.h   |  43 ++++
>  drivers/net/ethernet/netronome/nfp/flower/match.c  |  39 +++-
>  .../net/ethernet/netronome/nfp/flower/offload.c    |  20 +-
>  4 files changed, 324 insertions(+), 23 deletions(-)
>
> --
> 2.1.4
>

^ permalink raw reply

* [PATCH net-next,3/3] hv_netvsc: Update netvsc Document for TCP hash level setting
From: Haiyang Zhang @ 2017-10-06 15:33 UTC (permalink / raw)
  To: davem, netdev; +Cc: haiyangz, kys, sthemmin, olaf, vkuznets, linux-kernel
In-Reply-To: <20171006153359.8400-1-haiyangz@exchange.microsoft.com>

From: Haiyang Zhang <haiyangz@microsoft.com>

Update Documentation/networking/netvsc.txt for TCP hash level setting
and related info.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 Documentation/networking/netvsc.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/netvsc.txt b/Documentation/networking/netvsc.txt
index 93560fb1170a..92f5b31392fa 100644
--- a/Documentation/networking/netvsc.txt
+++ b/Documentation/networking/netvsc.txt
@@ -19,12 +19,12 @@ Features
 
   Receive Side Scaling
   --------------------
-  Hyper-V supports receive side scaling. For TCP, packets are
-  distributed among available queues based on IP address and port
+  Hyper-V supports receive side scaling. For TCP & UDP, packets can
+  be distributed among available queues based on IP address and port
   number.
 
-  For UDP, we can switch UDP hash level between L3 and L4 by ethtool
-  command. UDP over IPv4 and v6 can be set differently. The default
+  For TCP & UDP, we can switch hash level between L3 and L4 by ethtool
+  command. TCP/UDP over IPv4 and v6 can be set differently. The default
   hash level is L4. We currently only allow switching TX hash level
   from within the guests.
 
-- 
2.14.1

^ permalink raw reply related

* [PATCH net-next,2/3] hv_netvsc: Add ethtool handler to set and get TCP hash levels
From: Haiyang Zhang @ 2017-10-06 15:33 UTC (permalink / raw)
  To: davem, netdev; +Cc: haiyangz, kys, sthemmin, olaf, vkuznets, linux-kernel
In-Reply-To: <20171006153359.8400-1-haiyangz@exchange.microsoft.com>

From: Haiyang Zhang <haiyangz@microsoft.com>

The patch supports the options to switch TCP hash level between
L3 and L4 by ethtool command. TCP over IPv4 and v6 can be set
differently. The default hash level is L4. We currently only
allow switching TX hash level from within the guests.

For example, for TCP over IPv4 on eth0:
To include TCP port numbers in hashing:
	ethtool -N eth0 rx-flow-hash tcp4 sdfn
To exclude TCP port numbers in hashing:
	ethtool -N eth0 rx-flow-hash tcp4 sd
To show TCP hash level:
	ethtool -n eth0 rx-flow-hash tcp4

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 9bc7dbab9506..44746de3dd4c 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1264,8 +1264,15 @@ netvsc_get_rss_hash_opts(struct net_device_context *ndc,
 
 	switch (info->flow_type) {
 	case TCP_V4_FLOW:
+		if (ndc->l4_hash & HV_TCP4_L4HASH)
+			info->data |= l4_flag;
+
+		break;
+
 	case TCP_V6_FLOW:
-		info->data |= l4_flag;
+		if (ndc->l4_hash & HV_TCP6_L4HASH)
+			info->data |= l4_flag;
+
 		break;
 
 	case UDP_V4_FLOW:
@@ -1318,6 +1325,14 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
 	if (info->data == (RXH_IP_SRC | RXH_IP_DST |
 			   RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
 		switch (info->flow_type) {
+		case TCP_V4_FLOW:
+			ndc->l4_hash |= HV_TCP4_L4HASH;
+			break;
+
+		case TCP_V6_FLOW:
+			ndc->l4_hash |= HV_TCP6_L4HASH;
+			break;
+
 		case UDP_V4_FLOW:
 			ndc->l4_hash |= HV_UDP4_L4HASH;
 			break;
@@ -1335,6 +1350,14 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
 
 	if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
 		switch (info->flow_type) {
+		case TCP_V4_FLOW:
+			ndc->l4_hash &= ~HV_TCP4_L4HASH;
+			break;
+
+		case TCP_V6_FLOW:
+			ndc->l4_hash &= ~HV_TCP6_L4HASH;
+			break;
+
 		case UDP_V4_FLOW:
 			ndc->l4_hash &= ~HV_UDP4_L4HASH;
 			break;
-- 
2.14.1

^ permalink raw reply related

* [PATCH net-next,1/3] hv_netvsc: Change the hash level variable to bit flags
From: Haiyang Zhang @ 2017-10-06 15:33 UTC (permalink / raw)
  To: davem, netdev; +Cc: haiyangz, kys, sthemmin, olaf, vkuznets, linux-kernel
In-Reply-To: <20171006153359.8400-1-haiyangz@exchange.microsoft.com>

From: Haiyang Zhang <haiyangz@microsoft.com>

This simplifies the logic and make it easier to add more
options.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h | 11 +++++--
 drivers/net/hyperv/netvsc_drv.c | 73 ++++++++++++++++++++++++++++-------------
 2 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 6f550e15a41c..a81335e8ebe8 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -704,6 +704,14 @@ struct netvsc_reconfig {
 	u32 event;
 };
 
+/* L4 hash bits for different protocols */
+#define HV_TCP4_L4HASH 1
+#define HV_TCP6_L4HASH 2
+#define HV_UDP4_L4HASH 4
+#define HV_UDP6_L4HASH 8
+#define HV_DEFAULT_L4HASH (HV_TCP4_L4HASH | HV_TCP6_L4HASH | HV_UDP4_L4HASH | \
+			   HV_UDP6_L4HASH)
+
 /* The context of the netvsc device  */
 struct net_device_context {
 	/* point back to our device context */
@@ -726,10 +734,9 @@ struct net_device_context {
 	u32 tx_send_table[VRSS_SEND_TAB_SIZE];
 
 	/* Ethtool settings */
-	bool udp4_l4_hash;
-	bool udp6_l4_hash;
 	u8 duplex;
 	u32 speed;
+	u32 l4_hash; /* L4 hash settings */
 	struct netvsc_ethtool_stats eth_stats;
 
 	/* State to manage the associated VF interface. */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dfb986421ec6..9bc7dbab9506 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -203,7 +203,7 @@ static inline u32 netvsc_get_hash(
 	const struct net_device_context *ndc)
 {
 	struct flow_keys flow;
-	u32 hash;
+	u32 hash, pkt_proto = 0;
 	static u32 hashrnd __read_mostly;
 
 	net_get_random_once(&hashrnd, sizeof(hashrnd));
@@ -211,11 +211,25 @@ static inline u32 netvsc_get_hash(
 	if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
 		return 0;
 
-	if (flow.basic.ip_proto == IPPROTO_TCP ||
-	    (flow.basic.ip_proto == IPPROTO_UDP &&
-	     ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) ||
-	      (flow.basic.n_proto == htons(ETH_P_IPV6) &&
-	       ndc->udp6_l4_hash)))) {
+	switch (flow.basic.ip_proto) {
+	case IPPROTO_TCP:
+		if (flow.basic.n_proto == htons(ETH_P_IP))
+			pkt_proto = HV_TCP4_L4HASH;
+		else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+			pkt_proto = HV_TCP6_L4HASH;
+
+		break;
+
+	case IPPROTO_UDP:
+		if (flow.basic.n_proto == htons(ETH_P_IP))
+			pkt_proto = HV_UDP4_L4HASH;
+		else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+			pkt_proto = HV_UDP6_L4HASH;
+
+		break;
+	}
+
+	if (pkt_proto & ndc->l4_hash) {
 		return skb_get_hash(skb);
 	} else {
 		if (flow.basic.n_proto == htons(ETH_P_IP))
@@ -898,8 +912,7 @@ static void netvsc_init_settings(struct net_device *dev)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
 
-	ndc->udp4_l4_hash = true;
-	ndc->udp6_l4_hash = true;
+	ndc->l4_hash = HV_DEFAULT_L4HASH;
 
 	ndc->speed = SPEED_UNKNOWN;
 	ndc->duplex = DUPLEX_FULL;
@@ -1245,23 +1258,25 @@ static int
 netvsc_get_rss_hash_opts(struct net_device_context *ndc,
 			 struct ethtool_rxnfc *info)
 {
+	const u32 l4_flag = RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
 	info->data = RXH_IP_SRC | RXH_IP_DST;
 
 	switch (info->flow_type) {
 	case TCP_V4_FLOW:
 	case TCP_V6_FLOW:
-		info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		info->data |= l4_flag;
 		break;
 
 	case UDP_V4_FLOW:
-		if (ndc->udp4_l4_hash)
-			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		if (ndc->l4_hash & HV_UDP4_L4HASH)
+			info->data |= l4_flag;
 
 		break;
 
 	case UDP_V6_FLOW:
-		if (ndc->udp6_l4_hash)
-			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		if (ndc->l4_hash & HV_UDP6_L4HASH)
+			info->data |= l4_flag;
 
 		break;
 
@@ -1302,23 +1317,35 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
 {
 	if (info->data == (RXH_IP_SRC | RXH_IP_DST |
 			   RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		if (info->flow_type == UDP_V4_FLOW)
-			ndc->udp4_l4_hash = true;
-		else if (info->flow_type == UDP_V6_FLOW)
-			ndc->udp6_l4_hash = true;
-		else
+		switch (info->flow_type) {
+		case UDP_V4_FLOW:
+			ndc->l4_hash |= HV_UDP4_L4HASH;
+			break;
+
+		case UDP_V6_FLOW:
+			ndc->l4_hash |= HV_UDP6_L4HASH;
+			break;
+
+		default:
 			return -EOPNOTSUPP;
+		}
 
 		return 0;
 	}
 
 	if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
-		if (info->flow_type == UDP_V4_FLOW)
-			ndc->udp4_l4_hash = false;
-		else if (info->flow_type == UDP_V6_FLOW)
-			ndc->udp6_l4_hash = false;
-		else
+		switch (info->flow_type) {
+		case UDP_V4_FLOW:
+			ndc->l4_hash &= ~HV_UDP4_L4HASH;
+			break;
+
+		case UDP_V6_FLOW:
+			ndc->l4_hash &= ~HV_UDP6_L4HASH;
+			break;
+
+		default:
 			return -EOPNOTSUPP;
+		}
 
 		return 0;
 	}
-- 
2.14.1

^ permalink raw reply related

* [PATCH net-next,0/3] hv_netvsc: support changing TCP hash level
From: Haiyang Zhang @ 2017-10-06 15:33 UTC (permalink / raw)
  To: davem, netdev; +Cc: haiyangz, kys, sthemmin, olaf, vkuznets, linux-kernel

From: Haiyang Zhang <haiyangz@microsoft.com>

The patch set simplifies the existing hash level switching code for
UDP. It also adds the support for changing TCP hash level. So users
can switch between L3 an L4 hash levels for TCP and UDP.

Haiyang Zhang (3):
  hv_netvsc: Change the hash level variable to bit flags
  hv_netvsc: Add ethtool handler to set and get TCP hash levels
  hv_netvsc: Update netvsc Document for TCP hash level setting

 Documentation/networking/netvsc.txt |  8 ++--
 drivers/net/hyperv/hyperv_net.h     | 11 ++++-
 drivers/net/hyperv/netvsc_drv.c     | 96 ++++++++++++++++++++++++++++---------
 3 files changed, 86 insertions(+), 29 deletions(-)

-- 
2.14.1

^ permalink raw reply

* Re: [PATCH 4/4] tcp: avoid noref dst leak on input path
From: Eric Dumazet @ 2017-10-06 15:32 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: Eric Dumazet, LKML, Paul E. McKenney, Josh Triplett,
	Steven Rostedt, David S. Miller, Hannes Frederic Sowa, netdev
In-Reply-To: <1507303300.2793.25.camel@redhat.com>

On Fri, Oct 6, 2017 at 8:21 AM, Paolo Abeni <pabeni@redhat.com> wrote:
> Hi,
>
> On Fri, 2017-10-06 at 07:37 -0700, Eric Dumazet wrote:
>> On Fri, 2017-10-06 at 14:57 +0200, Paolo Abeni wrote:
>> > Enabling CONFIG_RCU_NOREF_DEBUG gives the following splat when
>> > processing tcp packets:
>> >
>> >            to-be-untracked noref entity ffff942cb71ea300 not found in cache
>> >            ------------[ cut here ]------------
>> >            WARNING: CPU: 24 PID: 178 at kernel/rcu/noref_debug.c:54 rcu_track_noref+0xa4/0xf0
>> >            Modules linked in: intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd iTCO_wdt ipmi_ssif mei_me iTCO_vendor_support mei dcdbas lpc_ich ipmi_si mxm_wmi sg pcspkr ipmi_devintf ipmi_msghandler acpi_power_meter shpchp wmi nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm igb drm ixgbe mdio crc32c_intel ahci ptp i2c_algo_bit libahci pps_core i2c_core libata dca dm_mirror dm_region_hash dm_log dm_mod
>> >            CPU: 24 PID: 178 Comm: ksoftirqd/24 Not tainted 4.14.0-rc1.noref_route+ #1610
>> >            Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017
>> >            task: ffff940e48300000 task.stack: ffffaec406a20000
>> >            RIP: 0010:rcu_track_noref+0xa4/0xf0
>> >            RSP: 0018:ffffaec406a238e0 EFLAGS: 00010246
>> >            RAX: 0000000000000040 RBX: 0000000000000000 RCX: 0000000000000002
>> >            RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000292
>> >            RBP: ffffaec406a238e0 R08: 0000000000000000 R09: 0000000000000000

> Thank you for the feedback.
>
> I most probably messed-up while extracting the info from dmsg, as this
> issue gives a couple of splats almost concurrently. Please let me re-do
> the test and post a more resonable dmsg.
>
> The problem with the current code is that in the tcp_rcv_established()
> -> tcp_queue_rcv() path, the skb_dst() is not cleared.
>

In any case, I would rather put one skb_dst_drop() right after the
last possible use of skb dst in TCP stack,
probably after sk_rx_dst_set() call.

Trying to move it in multiple places has been error prone, even if
current code is not buggy.

^ permalink raw reply

* Re: [PATCH 4/4] tcp: avoid noref dst leak on input path
From: Paolo Abeni @ 2017-10-06 15:21 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: linux-kernel, Paul E. McKenney, Josh Triplett, Steven Rostedt,
	David S. Miller, Eric Dumazet, Hannes Frederic Sowa, netdev
In-Reply-To: <1507300642.14419.17.camel@edumazet-glaptop3.roam.corp.google.com>

Hi,

On Fri, 2017-10-06 at 07:37 -0700, Eric Dumazet wrote:
> On Fri, 2017-10-06 at 14:57 +0200, Paolo Abeni wrote:
> > Enabling CONFIG_RCU_NOREF_DEBUG gives the following splat when
> > processing tcp packets:
> > 
> >            to-be-untracked noref entity ffff942cb71ea300 not found in cache
> >            ------------[ cut here ]------------
> >            WARNING: CPU: 24 PID: 178 at kernel/rcu/noref_debug.c:54 rcu_track_noref+0xa4/0xf0
> >            Modules linked in: intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd iTCO_wdt ipmi_ssif mei_me iTCO_vendor_support mei dcdbas lpc_ich ipmi_si mxm_wmi sg pcspkr ipmi_devintf ipmi_msghandler acpi_power_meter shpchp wmi nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm igb drm ixgbe mdio crc32c_intel ahci ptp i2c_algo_bit libahci pps_core i2c_core libata dca dm_mirror dm_region_hash dm_log dm_mod
> >            CPU: 24 PID: 178 Comm: ksoftirqd/24 Not tainted 4.14.0-rc1.noref_route+ #1610
> >            Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017
> >            task: ffff940e48300000 task.stack: ffffaec406a20000
> >            RIP: 0010:rcu_track_noref+0xa4/0xf0
> >            RSP: 0018:ffffaec406a238e0 EFLAGS: 00010246
> >            RAX: 0000000000000040 RBX: 0000000000000000 RCX: 0000000000000002
> >            RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000292
> >            RBP: ffffaec406a238e0 R08: 0000000000000000 R09: 0000000000000000
> >            R10: 0000000000000001 R11: 0000000000000003 R12: 0000000000000000
> >            R13: ffff942cb5110000 R14: 000000000000fe88 R15: ffff942cb1a20200
> >            FS:  0000000000000000(0000) GS:ffff942cbee00000(0000) knlGS:0000000000000000
> >            CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >            CR2: 00007febc072d140 CR3: 0000001feebd6002 CR4: 00000000003606e0
> >            DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> >            DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> >            Call Trace:
> >             tcp_data_queue+0x82a/0xce0
> 
> That is strange, since tcp_data_queue() starts with
> 
> 	skb_dst_drop(skb); 
> 
> So this stack trace looks suspicious.

Thank you for the feedback.

I most probably messed-up while extracting the info from dmsg, as this
issue gives a couple of splats almost concurrently. Please let me re-do 
the test and post a more resonable dmsg.

The problem with the current code is that in the tcp_rcv_established()
-> tcp_queue_rcv() path, the skb_dst() is not cleared.

Thanks,

Paolo

^ permalink raw reply

* Re: [PATCH 0/4] RCU: introduce noref debug
From: Paolo Abeni @ 2017-10-06 15:10 UTC (permalink / raw)
  To: paulmck
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, David S. Miller,
	Eric Dumazet, Hannes Frederic Sowa, netdev
In-Reply-To: <20171006133436.GY3521@linux.vnet.ibm.com>

Hi,

On Fri, 2017-10-06 at 06:34 -0700, Paul E. McKenney wrote:
> On Fri, Oct 06, 2017 at 02:57:45PM +0200, Paolo Abeni wrote:
> > The networking subsystem is currently using some kind of long-lived
> > RCU-protected, references to avoid the overhead of full book-keeping.
> > 
> > Such references - skb_dst() noref - are stored inside the skbs and can be
> > moved across relevant slices of the network stack, with the users
> > being in charge of properly clearing the relevant skb - or properly refcount
> > the related dst references - before the skb escapes the RCU section.
> > 
> > We currently don't have any deterministic debug infrastructure to check
> > the dst noref usages - and the introduction of others noref artifact is
> > currently under discussion.
> > 
> > This series tries to tackle the above introducing an RCU debug infrastructure
> > aimed at spotting incorrect noref pointer usage, in patch one. The
> > infrastructure is small and must be explicitly enabled via a newly introduced
> > build option.
> > 
> > Patch two uses such infrastructure to track dst noref usage in the networking
> > stack.
> > 
> > Patch 3 and 4 are bugfixes for small buglet found running this infrastructure
> > on basic scenarios.

Thank you for the prompt reply!
> 
> This patchset does not look like it handles rcu_read_lock() nesting.
> For example, given code like this:
> 
> 	void foo(void)
> 	{
> 		rcu_read_lock();
> 		rcu_track_noref(&key2, &noref2, true);
> 		do_something();
> 		rcu_track_noref(&key2, &noref2, false);
> 		rcu_read_unlock();
> 	}
> 
> 	void bar(void)
> 	{
> 		rcu_read_lock();
> 		rcu_track_noref(&key1, &noref1, true);
> 		do_something_more();
> 		foo();
> 		do_something_else();
> 		rcu_track_noref(&key1, &noref1, false);
> 		rcu_read_unlock();
> 	}
> 
> 	void grill(void)
> 	{
> 		foo();
> 	}
> 
> It looks like foo()'s rcu_read_unlock() will complain about key1.
> You could remove foo()'s rcu_read_lock() and rcu_read_unlock(), but
> that will break the call from grill().

Actually the code should cope correctly with your example; when foo()'s
rcu_read_unlock() is called, 'cache' contains:

{ { &key1, &noref1, 1},  // ...

and when the related __rcu_check_noref() is invoked preempt_count() is
2 - because the check is called before decreasing the preempt counter.

In the main loop inside __rcu_check_noref() we will hit always the
'continue' statement because 'cache->store[i].nesting != nesting', so
no warn will be triggered.

> Or am I missing something subtle here?  Given patch 3/4, I suspect not...

The problem with the code in patch 3/4 is different; currently
ip_route_input_noref() is basically doing:

rcu_read_lock();

rcu_track_noref(&key1, &noref1, true);

rcu_read_unlock();

So the rcu lock there silence any RCU based check inside
ip_route_input_noref() but does not really protect the noref dst.

Please let me know if the above clarify the scenario.

Thanks,

Paolo

^ permalink raw reply

* [PATCH net] ppp: fix race in ppp device destruction
From: Guillaume Nault @ 2017-10-06 15:05 UTC (permalink / raw)
  To: netdev; +Cc: Beniamino Galvani, linux-ppp, Paul Mackerras, David Ahern,
	Gao Feng

ppp_release() tries to ensure that netdevices are unregistered before
decrementing the unit refcount and running ppp_destroy_interface().

This is all fine as long as the the device is unregistered by
ppp_release(): the unregister_netdevice() call, followed by
rtnl_unlock(), guarantee that the unregistration process completes
before rtnl_unlock() returns.

However, the device may be unregistered by other means (like
ppp_nl_dellink()). If this happens right before ppp_release() calling
rtnl_lock(), then ppp_release() has to wait for the concurrent
unregistration code to release the lock.
But rtnl_unlock() releases the lock before completing the device
unregistration process. This allows ppp_release() to proceed and
eventually call ppp_destroy_interface() before the unregistration
process completes. Calling free_netdev() on this partially unregistered
device will BUG():

 ------------[ cut here ]------------
 kernel BUG at net/core/dev.c:8141!
 invalid opcode: 0000 [#1] SMP

 CPU: 1 PID: 1557 Comm: pppd Not tainted 4.14.0-rc2+ #4
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014

 Call Trace:
  ppp_destroy_interface+0xd8/0xe0 [ppp_generic]
  ppp_disconnect_channel+0xda/0x110 [ppp_generic]
  ppp_unregister_channel+0x5e/0x110 [ppp_generic]
  pppox_unbind_sock+0x23/0x30 [pppox]
  pppoe_connect+0x130/0x440 [pppoe]
  SYSC_connect+0x98/0x110
  ? do_fcntl+0x2c0/0x5d0
  SyS_connect+0xe/0x10
  entry_SYSCALL_64_fastpath+0x1a/0xa5

 RIP: free_netdev+0x107/0x110 RSP: ffffc28a40573d88
 ---[ end trace ed294ff0cc40eeff ]---

We could set the ->needs_free_netdev flag on PPP devices and move the
ppp_destroy_interface() logic in the ->priv_destructor() callback. But
that'd be quite intrusive as we'd first need to unlink from the other
channels and units that depend on the device (the ones that used the
PPPIOCCONNECT and PPPIOCATTACH ioctls).

Instead, we can just let the netdevice hold a reference on its
ppp_file. This reference is dropped in ->priv_destructor(), at the very
end of the unregistration process, so that neither ppp_release() nor
ppp_disconnect_channel() can call ppp_destroy_interface() in the interim.

Reported-by: Beniamino Galvani <bgalvani@redhat.com>
Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
---
 drivers/net/ppp/ppp_generic.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index c3f77e3b7819..e365866600ba 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1339,7 +1339,17 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64)
 
 static int ppp_dev_init(struct net_device *dev)
 {
+	struct ppp *ppp;
+
 	netdev_lockdep_set_classes(dev);
+
+	ppp = netdev_priv(dev);
+	/* Let the netdevice take a reference on the ppp file. This ensures
+	 * that ppp_destroy_interface() won't run before the device gets
+	 * unregistered.
+	 */
+	atomic_inc(&ppp->file.refcnt);
+
 	return 0;
 }
 
@@ -1362,6 +1372,15 @@ static void ppp_dev_uninit(struct net_device *dev)
 	wake_up_interruptible(&ppp->file.rwait);
 }
 
+static void ppp_dev_priv_destructor(struct net_device *dev)
+{
+	struct ppp *ppp;
+
+	ppp = netdev_priv(dev);
+	if (atomic_dec_and_test(&ppp->file.refcnt))
+		ppp_destroy_interface(ppp);
+}
+
 static const struct net_device_ops ppp_netdev_ops = {
 	.ndo_init	 = ppp_dev_init,
 	.ndo_uninit      = ppp_dev_uninit,
@@ -1387,6 +1406,7 @@ static void ppp_setup(struct net_device *dev)
 	dev->tx_queue_len = 3;
 	dev->type = ARPHRD_PPP;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+	dev->priv_destructor = ppp_dev_priv_destructor;
 	netif_keep_dst(dev);
 }
 
-- 
2.14.2

^ permalink raw reply related

* [PATCH v2] mwifiex: Use put_unaligned_le32
From: Himanshu Jha @ 2017-10-06 15:04 UTC (permalink / raw)
  To: amitkarwar
  Cc: nishants, gbhat, huxm, kvalo, linux-wireless, netdev,
	linux-kernel, Himanshu Jha

Use put_unaligned_le32 rather than using byte ordering function and
memcpy which makes code clear.
Also, add the header file where it is declared.

Done using Coccinelle and semantic patch used is :

@ rule1 @
identifier tmp; expression ptr,x; type T;
@@

- tmp = cpu_to_le32(x);

  <+... when != tmp
- memcpy(ptr, (T)&tmp, ...);
+ put_unaligned_le32(x,ptr);
  ...+>

@ depends on rule1 @
type j; identifier tmp;
@@

- j tmp;
  ...when != tmp

Signed-off-by: Himanshu Jha <himanshujha199640@gmail.com>
---
v2:
* added correct header file.

 drivers/net/wireless/marvell/mwifiex/cmdevt.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 0edc5d6..e28e119 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -17,6 +17,7 @@
  * this warranty disclaimer.
  */
 
+#include <asm/unaligned.h>
 #include "decl.h"
 #include "ioctl.h"
 #include "util.h"
@@ -183,7 +184,6 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv,
 	uint16_t cmd_code;
 	uint16_t cmd_size;
 	unsigned long flags;
-	__le32 tmp;
 
 	if (!adapter || !cmd_node)
 		return -1;
@@ -249,9 +249,9 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv,
 	mwifiex_dbg_dump(adapter, CMD_D, "cmd buffer:", host_cmd, cmd_size);
 
 	if (adapter->iface_type == MWIFIEX_USB) {
-		tmp = cpu_to_le32(MWIFIEX_USB_TYPE_CMD);
 		skb_push(cmd_node->cmd_skb, MWIFIEX_TYPE_LEN);
-		memcpy(cmd_node->cmd_skb->data, &tmp, MWIFIEX_TYPE_LEN);
+		put_unaligned_le32(MWIFIEX_USB_TYPE_CMD,
+				   cmd_node->cmd_skb->data);
 		adapter->cmd_sent = true;
 		ret = adapter->if_ops.host_to_card(adapter,
 						   MWIFIEX_USB_EP_CMD_EVENT,
@@ -317,7 +317,6 @@ static int mwifiex_dnld_sleep_confirm_cmd(struct mwifiex_adapter *adapter)
 				(struct mwifiex_opt_sleep_confirm *)
 						adapter->sleep_cfm->data;
 	struct sk_buff *sleep_cfm_tmp;
-	__le32 tmp;
 
 	priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY);
 
@@ -342,8 +341,7 @@ static int mwifiex_dnld_sleep_confirm_cmd(struct mwifiex_adapter *adapter)
 				      + MWIFIEX_TYPE_LEN);
 		skb_put(sleep_cfm_tmp, sizeof(struct mwifiex_opt_sleep_confirm)
 			+ MWIFIEX_TYPE_LEN);
-		tmp = cpu_to_le32(MWIFIEX_USB_TYPE_CMD);
-		memcpy(sleep_cfm_tmp->data, &tmp, MWIFIEX_TYPE_LEN);
+		put_unaligned_le32(MWIFIEX_USB_TYPE_CMD, sleep_cfm_tmp->data);
 		memcpy(sleep_cfm_tmp->data + MWIFIEX_TYPE_LEN,
 		       adapter->sleep_cfm->data,
 		       sizeof(struct mwifiex_opt_sleep_confirm));
-- 
2.7.4

^ permalink raw reply related

* Re: [net-next V4 PATCH 1/5] bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP
From: Daniel Borkmann @ 2017-10-06 14:52 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, jakub.kicinski, Michael S. Tsirkin, pavel.odintsov,
	Jason Wang, mchan, John Fastabend, peter.waskiewicz.jr,
	Daniel Borkmann, Alexei Starovoitov, Andy Gospodarek,
	Tobias Klauser
In-Reply-To: <20171006125008.676d5eaf@redhat.com>

On 10/06/2017 12:50 PM, Jesper Dangaard Brouer wrote:
> On Thu, 05 Oct 2017 11:40:15 +0200
> Daniel Borkmann <daniel@iogearbox.net> wrote:
>> On 10/04/2017 02:03 PM, Jesper Dangaard Brouer wrote:
>> [...]
>>> +#define CPU_MAP_BULK_SIZE 8  /* 8 == one cacheline on 64-bit archs */
>>> +struct xdp_bulk_queue {
>>> +	void *q[CPU_MAP_BULK_SIZE];
>>> +	unsigned int count;
>>> +};
>>> +
>>> +/* Struct for every remote "destination" CPU in map */
>>> +struct bpf_cpu_map_entry {
>>> +	u32 cpu;    /* kthread CPU and map index */
>>> +	int map_id; /* Back reference to map */
>>
>> map_id is not used here if I read it correctly? We should
>> then remove it.
>
> It is actually used in a later patch. Notice, there is no unused
> members in the final patch.  I did consider adding back in the later
> patch, but it was annoying to during the devel and split-up patch
> phase, as it creates conflicts when I move between the different
> patches, that need to modify this struct.  Thus, I choose to keep the
> end-struct in this cpumap-base-patch.  If you insist, I can go though
> the patch-stack and carefully introduce changes to the struct in steps?

It would be great to have every patch as self-contained as possible
since it otherwise makes reviewing much more time consuming to check
through the other patches for possible usage patterns, I noticed you
are using it for the tracepoints in patch 4/5 to dump map_id. Would
definitely be good if you could avoid such split in future sets.

[...]
>>> +void __cpu_map_queue_destructor(void *ptr)
>>> +{
>>> +	/* For now, just catch this as an error */
>>> +	if (!ptr)
>>> +		return;
>>> +	pr_err("ERROR: %s() cpu_map queue was not empty\n", __func__);
>>
>> Can you elaborate on this "for now" condition? Is this a race
>> when kthread doesn't consume queue on thread exit, or should it
>> be impossible to trigger (should it then be replaced with a
>> 'if (WARN_ON_ONCE(ptr)) page_frag_free(ptr)' and a more elaborate
>> comment)?
>
> The "for now" is an old comment while developing and testing this.
> In this final state of the patchset it _should_ not be possible to
> trigger this situation.  I like your idea of replacing it with a
> WARN_ON_ONCE.  (as it might be good to keep in some form, as it would
> catch is someone changing the code which breaks the RCU+WQ+kthread
> tear-down procedure).

Ok.

[...]
>>> +		/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
>>> +		rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
>>> +		if (!rcpu)
>>> +			return -ENOMEM;
>>> +	}
>>> +	rcu_read_lock();
>>> +	__cpu_map_entry_replace(cmap, key_cpu, rcpu);
>>> +	rcu_read_unlock();
>>> +	return 0;
>>
>> You need to update verifier such that this function cannot be called
>> out of an BPF program,
>
> In the example BPF program, I do a lookup into the map, but only to
> verify that an entry exist (I don't look at the value).  I would like
> to support such usage.

Ok, put comment below.

>> otherwise it would be possible under full RCU
>> read context, which is explicitly avoided here and also it would otherwise
>> be allowed for other maps of different type as well, which needs to
>> be avoided.
>
> Sorry, I don't follow this.

What I meant is that check_map_func_compatibility() should check for
BPF_MAP_TYPE_CPUMAP and only allow func_id of BPF_FUNC_redirect_map
and BPF_FUNC_map_lookup_elem to be used, which I haven't seen the set
restricting it to. Some of your later patches do this for the helper
BPF_FUNC_redirect_map but the important point is that map updates
wouldn't be done out of the BPF program itself, but rather from user
space control path given they can't be done under full RCU read lock
context if I read this correctly (which the programs run in though).

[...]
>>> +static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
>>> +{
>>> +	struct bpf_cpu_map_entry *rcpu =
>>> +		__cpu_map_lookup_elem(map, *(u32 *)key);
>>> +
>>> +	return rcpu ? &rcpu->qsize : NULL;
>>
>> The qsize doesn't seem used anywhere else besides here, but you
>> probably should update verifier such that this cannot be called
>> out of the BPF program, which could then mangle qsize value.
>
> It is true that the BPF prog can modify this qsize value, but it's not
> the authoritative value, so it doesn't really matter.
>
> As I said above, I do want to do a lookup from a BPF program.  To allow
> to BPF program to know, if an entry is valid, else it will blindly
> send to a cpu destination.  Maybe bpf_prog's just have to use a
> map-on-the-side to coordinate this(?), but then a sysadm modifying the
> real cpumap will be invisible to the program.
>
> Maybe we should just disable BPF-progs from reading this in the first
> iteration?  It would allow for more advanced usage schemes later..

Okay, what you could do is the following to prevent unintended value
updates. Just read out the qsize from the value and put that into a
per-cpu scratch buffer that gets returned instead of the map value,
so even if the scratch buffer gets mangled, it's okay because the
actual map value doesn't. Verifier still checks the map value access
bounds for that one.

> One crazy idea is to have the cpu_map_lookup_elem() return if any
> packets are in-flight on this cpu-queue. (Making it easier to avoid OoO
> packets, when switching target CPU, but it can also be implemented by
> the BPF-programmer herself via maps, although via some extra atomic
> cost).
>
>>> +}
>>> +
>>> +static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
>>> +{
>>> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
>>> +	u32 index = key ? *(u32 *)key : U32_MAX;
>>> +	u32 *next = next_key;
>>> +
>>> +	if (index >= cmap->map.max_entries) {
>>> +		*next = 0;
>>> +		return 0;
>>> +	}
>>> +
>>> +	if (index == cmap->map.max_entries - 1)
>>> +		return -ENOENT;
>>> +	*next = index + 1;
>>> +	return 0;
>>> +}
>
> I would have liked to have implemented next_key so it only returned the
> next valid cpu_entry, and used it as a simple round-robin scheduler.
> But AFAIK the next_key function is not allowed from bpf_prog's, right?

Hm, true we don't export this as a helper right now, I currently don't
see a reason why we couldn't. For the array map, the get_next_key is
probably not too useful given we only return key + 1. For user space it
might help for iterating/dumping the map though. Probably one could
enable it and add a flag to search the next valid entry from the map,
though issue could well be that this might need to iterate/test millions
of empty slots until you get to the next valid one which might not be
suitable in a generic case to do out of a BPF prog, perhaps a second
map with keys to round robin over might help.

Cheers,
Daniel

^ permalink raw reply

* Re: [PATCH 4/4] tcp: avoid noref dst leak on input path
From: Eric Dumazet @ 2017-10-06 14:37 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: linux-kernel, Paul E. McKenney, Josh Triplett, Steven Rostedt,
	David S. Miller, Eric Dumazet, Hannes Frederic Sowa, netdev
In-Reply-To: <77fd3b021d5a2f6dd4ad6b08e64f0643e3af95f4.1507294365.git.pabeni@redhat.com>

On Fri, 2017-10-06 at 14:57 +0200, Paolo Abeni wrote:
> Enabling CONFIG_RCU_NOREF_DEBUG gives the following splat when
> processing tcp packets:
> 
>            to-be-untracked noref entity ffff942cb71ea300 not found in cache
>            ------------[ cut here ]------------
>            WARNING: CPU: 24 PID: 178 at kernel/rcu/noref_debug.c:54 rcu_track_noref+0xa4/0xf0
>            Modules linked in: intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd iTCO_wdt ipmi_ssif mei_me iTCO_vendor_support mei dcdbas lpc_ich ipmi_si mxm_wmi sg pcspkr ipmi_devintf ipmi_msghandler acpi_power_meter shpchp wmi nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm igb drm ixgbe mdio crc32c_intel ahci ptp i2c_algo_bit libahci pps_core i2c_core libata dca dm_mirror dm_region_hash dm_log dm_mod
>            CPU: 24 PID: 178 Comm: ksoftirqd/24 Not tainted 4.14.0-rc1.noref_route+ #1610
>            Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017
>            task: ffff940e48300000 task.stack: ffffaec406a20000
>            RIP: 0010:rcu_track_noref+0xa4/0xf0
>            RSP: 0018:ffffaec406a238e0 EFLAGS: 00010246
>            RAX: 0000000000000040 RBX: 0000000000000000 RCX: 0000000000000002
>            RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000292
>            RBP: ffffaec406a238e0 R08: 0000000000000000 R09: 0000000000000000
>            R10: 0000000000000001 R11: 0000000000000003 R12: 0000000000000000
>            R13: ffff942cb5110000 R14: 000000000000fe88 R15: ffff942cb1a20200
>            FS:  0000000000000000(0000) GS:ffff942cbee00000(0000) knlGS:0000000000000000
>            CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>            CR2: 00007febc072d140 CR3: 0000001feebd6002 CR4: 00000000003606e0
>            DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>            DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>            Call Trace:
>             tcp_data_queue+0x82a/0xce0

That is strange, since tcp_data_queue() starts with

	skb_dst_drop(skb); 

So this stack trace looks suspicious.

>             tcp_rcv_established+0x283/0x570
>             tcp_v4_do_rcv+0x102/0x1e0
>             tcp_v4_rcv+0xa9e/0xc10
>             ip_local_deliver_finish+0x128/0x380
>             ? ip_local_deliver_finish+0x41/0x380
>             ip_local_deliver+0x74/0x230
>             ip_rcv_finish+0x105/0x5e0
>             ip_rcv+0x2a7/0x540
>             __netif_receive_skb_core+0x3b9/0xe10
>             ? netif_receive_skb_internal+0x40/0x390
>             __netif_receive_skb+0x18/0x60
>             netif_receive_skb_internal+0x8d/0x390
>             ? netif_receive_skb_internal+0x40/0x390
>             napi_gro_complete+0x127/0x1d0
>             ? napi_gro_complete+0x2a/0x1d0
>             napi_gro_flush+0x5f/0x80
>             napi_complete_done+0x96/0x100
>             ixgbe_poll+0x5f8/0x7c0 [ixgbe]
>             net_rx_action+0x27d/0x520
>             __do_softirq+0xd1/0x4f5
>             run_ksoftirqd+0x25/0x70
>             smpboot_thread_fn+0x11a/0x1f0
>             kthread+0x155/0x190
>             ? sort_range+0x30/0x30
>             ? kthread_create_on_node+0x70/0x70
>             ret_from_fork+0x2a/0x40
>            Code: e9 83 c2 01 48 83 c0 18 83 fa 07 75 ef 80 3d b0 e5 ff 00 00 75 d2 48 c7 c7 50 54 07 9d 31 c0 c6 05 9e e5 ff 00 01 e8 ef af fe ff <0f> ff 5d c3 80 3d 8f e5 ff 00 00 75 b0 48 c7 c7 00 54 07 9d 31
> 
> we must clear the skb dst before enqueuing the skb somewhere,
> but currently the dst entry for packets delivered
> via tcp_rcv_established() -> tcp_queue_rcv() is not cleared.
> 
> Fix it by adding the explicit drop in tcp_queue_rcv() and moving
> the current skb_dst_drop() just before the other enqueuing
> operation, do avoid unneeded double skb_dst_drop() for some
> path.
> 
> The leak itself is not harmful, because the tcp recvmsg() code
> should not access such info.
> 
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
>  net/ipv4/tcp_input.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index c5d7656beeee..bf4e17edfe7a 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4422,6 +4422,9 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
>  		return;
>  	}
>  
> +	/* drop the -possibly noref - dst before delivery the skb to ofo tree */
> +	skb_dst_drop(skb);
> +
>  	/* Stash tstamp to avoid being stomped on by rbnode */
>  	if (TCP_SKB_CB(skb)->has_rxtstamp)
>  		TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
> @@ -4560,6 +4563,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
>  				  skb, fragstolen)) ? 1 : 0;
>  	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
>  	if (!eaten) {
> +		skb_dst_drop(skb);
>  		__skb_queue_tail(&sk->sk_receive_queue, skb);
>  		skb_set_owner_r(skb, sk);
>  	}
> @@ -4626,7 +4630,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
>  		__kfree_skb(skb);
>  		return;
>  	}
> -	skb_dst_drop(skb);
>  	__skb_pull(skb, tcp_hdr(skb)->doff * 4);
>  
>  	tcp_ecn_accept_cwr(tp, skb);

^ permalink raw reply

* Re: [PATCH] net/mlx4_core: Convert timers to use timer_setup()
From: Kees Cook @ 2017-10-06 14:26 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: LKML, Tariq Toukan, Network Development, linux-rdma,
	Thomas Gleixner
In-Reply-To: <20171005093802.GK25829-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>

On Thu, Oct 5, 2017 at 2:38 AM, Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> On Wed, Oct 04, 2017 at 05:51:54PM -0700, Kees Cook wrote:
>> In preparation for unconditionally passing the struct timer_list pointer to
>> all timer callbacks, switch to using the new timer_setup() and from_timer()
>> to pass the timer pointer explicitly.
>>
>> Cc: Tariq Toukan <tariqt-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
>> Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Cc: Thomas Gleixner <tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org>
>> Signed-off-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
>> ---
>> This requires commit 686fef928bba ("timer: Prepare to change timer
>> callback argument type") in v4.14-rc3, but should be otherwise
>> stand-alone.
>> ---
>>  drivers/net/ethernet/mellanox/mlx4/catas.c | 10 ++++------
>>  1 file changed, 4 insertions(+), 6 deletions(-)
>>
>
> Hi Kees,
>
> In RDMA, we had very similar patch [1] to your patch series, but it converts to
> setup_timer, while you are converting to timer_setup.
>
> Which conversion is the right one?

The timer_setup() is the new API designed to eliminate the .data field
and pass in the struct timer_list to callbacks. (Please take the
timer_setup() change instead.)

Thanks!

-Kees

>
> [1] https://patchwork.kernel.org/patch/9980701/
>
> Thanks
>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
>> index 53daa6ca5d83..e2b6b0cac1ac 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/catas.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
>> @@ -231,10 +231,10 @@ static void dump_err_buf(struct mlx4_dev *dev)
>>                        i, swab32(readl(priv->catas_err.map + i)));
>>  }
>>
>> -static void poll_catas(unsigned long dev_ptr)
>> +static void poll_catas(struct timer_list *t)
>>  {
>> -     struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
>> -     struct mlx4_priv *priv = mlx4_priv(dev);
>> +     struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer);
>> +     struct mlx4_dev *dev = &priv->dev;
>>       u32 slave_read;
>>
>>       if (mlx4_is_slave(dev)) {
>> @@ -277,7 +277,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
>>       phys_addr_t addr;
>>
>>       INIT_LIST_HEAD(&priv->catas_err.list);
>> -     init_timer(&priv->catas_err.timer);
>> +     timer_setup(&priv->catas_err.timer, poll_catas, 0);
>>       priv->catas_err.map = NULL;
>>
>>       if (!mlx4_is_slave(dev)) {
>> @@ -293,8 +293,6 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
>>               }
>>       }
>>
>> -     priv->catas_err.timer.data     = (unsigned long) dev;
>> -     priv->catas_err.timer.function = poll_catas;
>>       priv->catas_err.timer.expires  =
>>               round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
>>       add_timer(&priv->catas_err.timer);
>> --
>> 2.7.4
>>
>>
>> --
>> Kees Cook
>> Pixel Security
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Kees Cook
Pixel Security
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 1/4] rcu: introduce noref debug
From: Steven Rostedt @ 2017-10-06 14:13 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: linux-kernel, Paul E. McKenney, Josh Triplett, David S. Miller,
	Eric Dumazet, Hannes Frederic Sowa, netdev
In-Reply-To: <bc39fa36c0765e6d2f7dab82e4c6460989669957.1507294365.git.pabeni@redhat.com>

On Fri,  6 Oct 2017 14:57:46 +0200
Paolo Abeni <pabeni@redhat.com> wrote:

> We currently lack a debugging infrastructure to ensure that
> long-lived noref dst are properly handled - namely dropped
> or converted to a refcounted version before escaping the current
> RCU section.
> 
> This changeset implements such infra, tracking the noref pointer
> on a per CPU store and checking that all noref tracked at any
> given RCU nesting level are cleared before leaving such RCU
> section.
> 
> Each noref entity is identified using the entity pointer and an
> additional, optional, key/opaque pointer. This is needed to cope
> with usage case scenario where the same noref entity is stored
> in different places (e.g. noref dst on skb_clone()).
> 
> To keep the patch/implementation simple RCU_NOREF_DEBUG depends
> on PREEMPT_RCU=n in Kconfig.
> 
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
>  include/linux/rcupdate.h | 11 ++++++
>  kernel/rcu/Kconfig.debug | 15 ++++++++
>  kernel/rcu/Makefile      |  1 +
>  kernel/rcu/noref_debug.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 116 insertions(+)
>  create mode 100644 kernel/rcu/noref_debug.c
> 
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index de50d8a4cf41..20c1ce08e3eb 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -60,6 +60,15 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
>  void synchronize_sched(void);
>  void rcu_barrier_tasks(void);
>  
> +#ifdef CONFIG_RCU_NOREF_DEBUG
> +void rcu_track_noref(void *key, void *noref, bool track);
> +void __rcu_check_noref(void);
> +
> +#else
> +static inline void rcu_track_noref(void *key, void *noref, bool add) { }
> +static inline void __rcu_check_noref(void) { }
> +#endif
> +
>  #ifdef CONFIG_PREEMPT_RCU
>  
>  void __rcu_read_lock(void);
> @@ -85,6 +94,7 @@ static inline void __rcu_read_lock(void)
>  
>  static inline void __rcu_read_unlock(void)
>  {
> +	__rcu_check_noref();
>  	if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
>  		preempt_enable();
>  }
> @@ -723,6 +733,7 @@ static inline void rcu_read_unlock_bh(void)
>  			 "rcu_read_unlock_bh() used illegally while idle");
>  	rcu_lock_release(&rcu_bh_lock_map);
>  	__release(RCU_BH);
> +	__rcu_check_noref();
>  	local_bh_enable();
>  }
>  
> diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
> index 0ec7d1d33a14..6c7f52a3e809 100644
> --- a/kernel/rcu/Kconfig.debug
> +++ b/kernel/rcu/Kconfig.debug
> @@ -68,6 +68,21 @@ config RCU_TRACE
>  	  Say Y here if you want to enable RCU tracing
>  	  Say N if you are unsure.
>  
> +config RCU_NOREF_DEBUG
> +	bool "Debugging for RCU-protected noref entries"
> +	depends on PREEMPT_RCU=n
> +	select PREEMPT_COUNT
> +	default n
> +	help
> +	  In case of long lasting rcu_read_lock sections this debug
> +	  feature enables one to ensure that no rcu managed dereferenced
> +	  data leaves the locked section. One use case is the tracking
> +	  of dst_entries in struct sk_buff ->dst, which is used to pass
> +	  the dst_entry around in the whole stack while under RCU lock.
> +
> +	  Say Y here if you want to enable noref RCU debugging
> +	  Say N if you are unsure.
> +
>  config RCU_EQS_DEBUG
>  	bool "Provide debugging asserts for adding NO_HZ support to an arch"
>  	depends on DEBUG_KERNEL
> diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
> index 13c0fc852767..c67d7c65c582 100644
> --- a/kernel/rcu/Makefile
> +++ b/kernel/rcu/Makefile
> @@ -11,3 +11,4 @@ obj-$(CONFIG_TREE_RCU) += tree.o
>  obj-$(CONFIG_PREEMPT_RCU) += tree.o
>  obj-$(CONFIG_TINY_RCU) += tiny.o
>  obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o
> +obj-$(CONFIG_RCU_NOREF_DEBUG) += noref_debug.o
> \ No newline at end of file
> diff --git a/kernel/rcu/noref_debug.c b/kernel/rcu/noref_debug.c
> new file mode 100644
> index 000000000000..ae2e104b11d6
> --- /dev/null
> +++ b/kernel/rcu/noref_debug.c
> @@ -0,0 +1,89 @@
> +#include <linux/bug.h>
> +#include <linux/percpu.h>
> +#include <linux/skbuff.h>
> +#include <linux/bitops.h>
> +
> +#define NOREF_MAX 7
> +struct noref_entry {
> +	void *noref;
> +	void *key;
> +	int nesting;
> +};
> +
> +struct noref_cache {
> +	struct noref_entry store[NOREF_MAX];
> +};
> +
> +static DEFINE_PER_CPU(struct noref_cache, per_cpu_noref_cache);
> +
> +static int noref_cache_lookup(struct noref_cache *cache, void *noref, void *key)
> +{
> +	int i;
> +
> +	for (i = 0; i < NOREF_MAX; ++i)
> +		if (cache->store[i].noref == noref &&
> +		    cache->store[i].key == key)
> +			return i;
> +
> +	return -1;
> +}
> +

Please add a comment above this function on how to use it.

-- Steve

> +void rcu_track_noref(void *key, void *noref, bool track)
> +{
> +	struct noref_cache *cache = this_cpu_ptr(&per_cpu_noref_cache);
> +	int i;
> +
> +	if (track) {
> +		/* find the first empty slot */
> +		i = noref_cache_lookup(cache, NULL, 0);
> +		if (i < 0) {
> +			WARN_ONCE(1, "can't find empty an slot to track a noref"
> +				  " noref tracking will be inaccurate");
> +			return;
> +		}
> +
> +		cache->store[i].noref = noref;
> +		cache->store[i].key = key;
> +		cache->store[i].nesting = preempt_count();
> +		return;
> +	}
> +
> +	i = noref_cache_lookup(cache, noref, key);
> +	if (i == -1) {
> +		WARN_ONCE(1, "to-be-untracked noref entity %p not found in "
> +			  "cache\n", noref);
> +		return;
> +	}
> +
> +	cache->store[i].noref = NULL;
> +	cache->store[i].key = NULL;
> +	cache->store[i].nesting = 0;
> +}
> +EXPORT_SYMBOL_GPL(rcu_track_noref);
> +
> +void __rcu_check_noref(void)
> +{
> +	struct noref_cache *cache = this_cpu_ptr(&per_cpu_noref_cache);
> +	char *cur, buf[strlen("0xffffffffffffffff") * NOREF_MAX + 1];
> +	int nesting = preempt_count();
> +	int i, ret, cnt = 0;
> +
> +	cur = buf;
> +	for (i = 0; i < NOREF_MAX; ++i) {
> +		if (!cache->store[i].noref ||
> +		    cache->store[i].nesting != nesting)
> +			continue;
> +
> +		cnt++;
> +		ret = sprintf(cur, " %p", cache->store[i].noref);
> +		if (ret >= 0)
> +			cur += ret;
> +		cache->store[i].noref = NULL;
> +		cache->store[i].key = NULL;
> +		cache->store[i].nesting = 0;
> +	}
> +
> +	WARN_ONCE(cnt, "%d noref entities escaped an RCU section, "
> +		  "nesting %d, leaked noref list %s", cnt, nesting, buf);
> +}
> +EXPORT_SYMBOL_GPL(__rcu_check_noref);

^ permalink raw reply

* [PATCH 2/4] batman-adv: Remove unnecessary parentheses
From: Simon Wunderlich @ 2017-10-06 13:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Simon Wunderlich
In-Reply-To: <20171006135437.26736-1-sw@simonwunderlich.de>

From: Sven Eckelmann <sven@narfation.org>

checkpatch introduced with commit 63b7c73ec86b ("checkpatch: add --strict
check for ifs with unnecessary parentheses") an additional test which
identifies some unnecessary parentheses.

Remove these unnecessary parentheses to avoid the warnings and to unify the
coding style slightly more.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_iv_ogm.c            | 24 ++++++++++++------------
 net/batman-adv/bat_v.c                 |  2 +-
 net/batman-adv/bat_v_elp.c             |  6 +++---
 net/batman-adv/bat_v_ogm.c             | 12 ++++++------
 net/batman-adv/distributed-arp-table.c |  4 ++--
 net/batman-adv/gateway_client.c        |  8 ++++----
 net/batman-adv/gateway_common.c        | 18 +++++++++---------
 net/batman-adv/hard-interface.c        | 12 ++++++------
 net/batman-adv/icmp_socket.c           |  4 ++--
 net/batman-adv/main.c                  |  4 ++--
 net/batman-adv/multicast.c             |  2 +-
 net/batman-adv/originator.c            | 26 +++++++++++++-------------
 net/batman-adv/routing.c               |  6 +++---
 net/batman-adv/send.c                  |  6 +++---
 net/batman-adv/soft-interface.c        |  2 +-
 net/batman-adv/sysfs.c                 |  4 ++--
 net/batman-adv/tp_meter.c              |  2 +-
 17 files changed, 71 insertions(+), 71 deletions(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 83ba5483455a..1b659ab652fb 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -916,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
 	u16 tvlv_len = 0;
 	unsigned long send_time;
 
-	if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
-	    (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+	if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+	    hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
 		return;
 
 	/* the interface gets activated here to avoid race conditions between
@@ -1264,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	 * drops as they can't send and receive at the same time.
 	 */
 	tq_iface_penalty = BATADV_TQ_MAX_VALUE;
-	if (if_outgoing && (if_incoming == if_outgoing) &&
+	if (if_outgoing && if_incoming == if_outgoing &&
 	    batadv_is_wifi_hardif(if_outgoing))
 		tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
 						      bat_priv);
@@ -1369,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
 				ret = BATADV_NEIGH_DUP;
 		} else {
 			set_mark = 0;
-			if (is_dup && (ret != BATADV_NEIGH_DUP))
+			if (is_dup && ret != BATADV_NEIGH_DUP)
 				ret = BATADV_ORIG_DUP;
 		}
 
@@ -1515,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
 	/* drop packet if sender is not a direct neighbor and if we
 	 * don't route towards it
 	 */
-	if (!is_single_hop_neigh && (!orig_neigh_router)) {
+	if (!is_single_hop_neigh && !orig_neigh_router) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Drop packet: OGM via unknown neighbor!\n");
 		goto out_neigh;
@@ -1535,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
 	sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno);
 	similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl;
 
-	if (is_bidirect && ((dup_status == BATADV_NO_DUP) ||
+	if (is_bidirect && (dup_status == BATADV_NO_DUP ||
 			    (sameseq && similar_ttl))) {
 		batadv_iv_ogm_orig_update(bat_priv, orig_node,
 					  orig_ifinfo, ethhdr,
@@ -1553,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
 		/* OGMs from secondary interfaces should only scheduled once
 		 * per interface where it has been received, not multiple times
 		 */
-		if ((ogm_packet->ttl <= 2) &&
-		    (if_incoming != if_outgoing)) {
+		if (ogm_packet->ttl <= 2 &&
+		    if_incoming != if_outgoing) {
 			batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 				   "Drop packet: OGM from secondary interface and wrong outgoing interface\n");
 			goto out_neigh;
@@ -1590,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
 			      if_incoming, if_outgoing);
 
 out_neigh:
-	if ((orig_neigh_node) && (!is_single_hop_neigh))
+	if (orig_neigh_node && !is_single_hop_neigh)
 		batadv_orig_node_put(orig_neigh_node);
 out:
 	if (router_ifinfo)
@@ -2523,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 			tmp_gw_factor *= 100 * 100;
 			tmp_gw_factor >>= 18;
 
-			if ((tmp_gw_factor > max_gw_factor) ||
-			    ((tmp_gw_factor == max_gw_factor) &&
-			     (tq_avg > max_tq))) {
+			if (tmp_gw_factor > max_gw_factor ||
+			    (tmp_gw_factor == max_gw_factor &&
+			     tq_avg > max_tq)) {
 				if (curr_gw)
 					batadv_gw_node_put(curr_gw);
 				curr_gw = gw_node;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 4e2724c5b33d..93ef1c06227e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -767,7 +767,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 		if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
 			goto next;
 
-		if (curr_gw && (bw <= max_bw))
+		if (curr_gw && bw <= max_bw)
 			goto next;
 
 		if (curr_gw)
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index bd1064d98e16..1de992c58b35 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
 			hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
 
 		throughput = link_settings.base.speed;
-		if (throughput && (throughput != SPEED_UNKNOWN))
+		if (throughput && throughput != SPEED_UNKNOWN)
 			return throughput * 10;
 	}
 
@@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
 		goto out;
 
 	/* we are in the process of shutting this interface down */
-	if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
-	    (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+	if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+	    hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
 		goto out;
 
 	/* the interface was enabled but may not be ready yet */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 8be61734fc43..c251445a42a0 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
 	 * due to the store & forward characteristics of WIFI.
 	 * Very low throughput values are the exception.
 	 */
-	if ((throughput > 10) &&
-	    (if_incoming == if_outgoing) &&
+	if (throughput > 10 &&
+	    if_incoming == if_outgoing &&
 	    !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
 		return throughput / 2;
 
@@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
 	/* drop packets with old seqnos, however accept the first packet after
 	 * a host has been rebooted.
 	 */
-	if ((seq_diff < 0) && !protection_started)
+	if (seq_diff < 0 && !protection_started)
 		goto out;
 
 	neigh_node->last_seen = jiffies;
@@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
 		router_throughput = router_ifinfo->bat_v.throughput;
 		neigh_throughput = neigh_ifinfo->bat_v.throughput;
 
-		if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
-		    (router_throughput >= neigh_throughput))
+		if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF &&
+		    router_throughput >= neigh_throughput)
 			goto out;
 	}
 
@@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
 		return;
 
 	/* only unknown & newer OGMs contain TVLVs we are interested in */
-	if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT))
+	if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
 		batadv_tvlv_containers_process(bat_priv, true, orig_node,
 					       NULL, NULL,
 					       (unsigned char *)(ogm2 + 1),
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b6cfa78e9381..760c0de72582 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
 	/* this is an hash collision with the temporary selected node. Choose
 	 * the one with the lowest address
 	 */
-	if ((tmp_max == max) && max_orig_node &&
-	    (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
+	if (tmp_max == max && max_orig_node &&
+	    batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
 		goto out;
 
 	ret = true;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de9955d5224d..10d521f0b17f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
 		}
 	}
 
-	if ((curr_gw) && (!next_gw)) {
+	if (curr_gw && !next_gw) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Removing selected gateway - no gateway in range\n");
 		batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL,
 				    NULL);
-	} else if ((!curr_gw) && (next_gw)) {
+	} else if (!curr_gw && next_gw) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
 			   next_gw->orig_node->orig,
@@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 		goto out;
 	}
 
-	if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) &&
-	    (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)))
+	if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) &&
+	    gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))
 		goto out;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 33940c5c74a8..2c26039c23fc 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
 		if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
 			bw_unit_type = BATADV_BW_UNIT_MBIT;
 
-		if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) ||
-		    (bw_unit_type == BATADV_BW_UNIT_MBIT))
+		if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
+		    bw_unit_type == BATADV_BW_UNIT_MBIT)
 			*tmp_ptr = '\0';
 	}
 
@@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
 	if (!up_new)
 		up_new = 1;
 
-	if ((down_curr == down_new) && (up_curr == up_new))
+	if (down_curr == down_new && up_curr == up_new)
 		return count;
 
 	batadv_gw_reselect(bat_priv);
@@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 	/* only fetch the tvlv value if the handler wasn't called via the
 	 * CIFNOTFND flag and if there is data to fetch
 	 */
-	if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) ||
-	    (tvlv_value_len < sizeof(gateway))) {
+	if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND ||
+	    tvlv_value_len < sizeof(gateway)) {
 		gateway.bandwidth_down = 0;
 		gateway.bandwidth_up = 0;
 	} else {
 		gateway_ptr = tvlv_value;
 		gateway.bandwidth_down = gateway_ptr->bandwidth_down;
 		gateway.bandwidth_up = gateway_ptr->bandwidth_up;
-		if ((gateway.bandwidth_down == 0) ||
-		    (gateway.bandwidth_up == 0)) {
+		if (gateway.bandwidth_down == 0 ||
+		    gateway.bandwidth_up == 0) {
 			gateway.bandwidth_down = 0;
 			gateway.bandwidth_up = 0;
 		}
@@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 	batadv_gw_node_update(bat_priv, orig, &gateway);
 
 	/* restart gateway selection */
-	if ((gateway.bandwidth_down != 0) &&
-	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
+	if (gateway.bandwidth_down != 0 &&
+	    atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)
 		batadv_gw_check_election(bat_priv, orig);
 }
 
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e348f76ea8c1..d4aa99c060f9 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
-		if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
-		    (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+		if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+		    hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
 			continue;
 
 		if (hard_iface->net_dev == net_dev)
@@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
-		if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
-		    (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+		if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+		    hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
 			continue;
 
 		if (hard_iface->soft_iface != soft_iface)
@@ -654,8 +654,8 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
 static void
 batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
 {
-	if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
-	    (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+	if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+	    hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
 		return;
 
 	hard_iface->if_status = BATADV_IF_INACTIVE;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 8ead292886d1..bded31121d12 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
 	size_t packet_len;
 	int error;
 
-	if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0))
+	if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0)
 		return -EAGAIN;
 
-	if ((!buf) || (count < sizeof(struct batadv_icmp_packet)))
+	if (!buf || count < sizeof(struct batadv_icmp_packet))
 		return -EINVAL;
 
 	if (!access_ok(VERIFY_WRITE, buf, count))
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fb381fb26a66..033819aefc39 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -544,8 +544,8 @@ batadv_recv_handler_register(u8 packet_type,
 		    struct batadv_hard_iface *);
 	curr = batadv_rx_handler[packet_type];
 
-	if ((curr != batadv_recv_unhandled_packet) &&
-	    (curr != batadv_recv_unhandled_unicast_packet))
+	if (curr != batadv_recv_unhandled_packet &&
+	    curr != batadv_recv_unhandled_unicast_packet)
 		return -EBUSY;
 
 	batadv_rx_handler[packet_type] = recv_handler;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d327670641ac..e553a8770a89 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
 	bool orig_initialized;
 
 	if (orig_mcast_enabled && tvlv_value &&
-	    (tvlv_value_len >= sizeof(mcast_flags)))
+	    tvlv_value_len >= sizeof(mcast_flags))
 		mcast_flags = *(u8 *)tvlv_value;
 
 	spin_lock_bh(&orig->mcast_handler_lock);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8e2a4b205257..2967b86c13da 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
 			continue;
 
 		/* don't purge if the interface is not (going) down */
-		if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
-		    (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
-		    (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+		if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+		    if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+		    if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
 			continue;
 
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
 			continue;
 
 		/* don't purge if the interface is not (going) down */
-		if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
-		    (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
-		    (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+		if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+		    if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+		    if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
 			continue;
 
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 		last_seen = neigh_node->last_seen;
 		if_incoming = neigh_node->if_incoming;
 
-		if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) ||
-		    (if_incoming->if_status == BATADV_IF_INACTIVE) ||
-		    (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
-		    (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) {
-			if ((if_incoming->if_status == BATADV_IF_INACTIVE) ||
-			    (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
-			    (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED))
+		if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) ||
+		    if_incoming->if_status == BATADV_IF_INACTIVE ||
+		    if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+		    if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) {
+			if (if_incoming->if_status == BATADV_IF_INACTIVE ||
+			    if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+			    if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)
 				batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 					   "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n",
 					   orig_node->orig, neigh_node->addr,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f10e3ff26f9d..40d9bf3e5bfe 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 	batadv_orig_ifinfo_put(orig_ifinfo);
 
 	/* route deleted */
-	if ((curr_router) && (!neigh_node)) {
+	if (curr_router && !neigh_node) {
 		batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
 			   "Deleting route towards: %pM\n", orig_node->orig);
 		batadv_tt_global_del_orig(bat_priv, orig_node, -1,
 					  "Deleted route towards originator");
 
 	/* route added */
-	} else if ((!curr_router) && (neigh_node)) {
+	} else if (!curr_router && neigh_node) {
 		batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
 			   "Adding route towards: %pM (via %pM)\n",
 			   orig_node->orig, neigh_node->addr);
@@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	/* add record route information if not full */
 	if ((icmph->msg_type == BATADV_ECHO_REPLY ||
 	     icmph->msg_type == BATADV_ECHO_REQUEST) &&
-	    (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
+	    skb->len >= sizeof(struct batadv_icmp_packet_rr)) {
 		if (skb_linearize(skb) < 0)
 			goto free_skb;
 
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 054a65e6eb68..7895323fd2a7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
 #ifdef CONFIG_BATMAN_ADV_BATMAN_V
 	hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
 
-	if ((hardif_neigh) && (ret != NET_XMIT_DROP))
+	if (hardif_neigh && ret != NET_XMIT_DROP)
 		hardif_neigh->bat_v.last_unicast_tx = jiffies;
 
 	if (hardif_neigh)
@@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
 		 * we delete only packets belonging to the given interface
 		 */
 		if (hard_iface &&
-		    (forw_packet->if_incoming != hard_iface) &&
-		    (forw_packet->if_outgoing != hard_iface))
+		    forw_packet->if_incoming != hard_iface &&
+		    forw_packet->if_outgoing != hard_iface)
 			continue;
 
 		hlist_del(&forw_packet->list);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index c2c986746d0b..7c8288245f80 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
 {
 	/* check ranges */
-	if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev)))
+	if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
 		return -EINVAL;
 
 	dev->mtu = new_mtu;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0ae8b30e4eaa..aa187fd42475 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
 	if (hard_iface->if_status == status_tmp)
 		goto out;
 
-	if ((hard_iface->soft_iface) &&
-	    (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
+	if (hard_iface->soft_iface &&
+	    strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)
 		goto out;
 
 	if (status_tmp == BATADV_IF_NOT_IN_USE) {
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bfe8effe9238..4b90033f35a8 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1206,7 +1206,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
 
 	/* send the ack */
 	r = batadv_send_skb_to_orig(skb, orig_node, NULL);
-	if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+	if (unlikely(r < 0) || r == NET_XMIT_DROP) {
 		ret = BATADV_TP_REASON_DST_UNREACHABLE;
 		goto out;
 	}
-- 
2.11.0

^ permalink raw reply related

* [PATCH 0/4] pull request for net-next: batman-adv 2017-10-06
From: Simon Wunderlich @ 2017-10-06 13:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Simon Wunderlich

Hi David,

here is a small cleanup pull request of batman-adv to go into net-next.

Please pull or let me know of any problem!

Thank you,
      Simon

The following changes since commit 242c1a28eb61cb34974e8aa05235d84355940a8a:

  net: Remove useless function skb_header_release (2017-09-22 20:43:13 -0700)

are available in the git repository at:

  git://git.open-mesh.org/linux-merge.git tags/batadv-next-for-davem-20171006

for you to fetch changes up to 706cc9f51d9a22528af18d4b3ffbd17b30a1d3b0:

  batman-adv: Add argument names for function ptr definitions (2017-09-30 09:31:34 +0200)

----------------------------------------------------------------
This cleanup patchset includes the following patches:

 - bump version strings, by Simon Wunderlich

 - Cleanup patches to make checkpatch happy, by Sven Eckelmann (3 patches)

----------------------------------------------------------------
Simon Wunderlich (1):
      batman-adv: Start new development cycle

Sven Eckelmann (3):
      batman-adv: Remove unnecessary parentheses
      batman-adv: Fix "line over 80 characters" checkpatch warning
      batman-adv: Add argument names for function ptr definitions

 net/batman-adv/bat_iv_ogm.c            | 24 ++++++++++++------------
 net/batman-adv/bat_v.c                 |  2 +-
 net/batman-adv/bat_v_elp.c             |  6 +++---
 net/batman-adv/bat_v_ogm.c             | 12 ++++++------
 net/batman-adv/distributed-arp-table.c |  4 ++--
 net/batman-adv/gateway_client.c        |  8 ++++----
 net/batman-adv/gateway_common.c        | 18 +++++++++---------
 net/batman-adv/hard-interface.c        | 12 ++++++------
 net/batman-adv/icmp_socket.c           |  4 ++--
 net/batman-adv/main.c                  | 12 ++++++------
 net/batman-adv/main.h                  |  2 +-
 net/batman-adv/multicast.c             |  2 +-
 net/batman-adv/originator.c            | 26 +++++++++++++-------------
 net/batman-adv/routing.c               |  6 +++---
 net/batman-adv/send.c                  |  6 +++---
 net/batman-adv/soft-interface.c        |  6 +++---
 net/batman-adv/sysfs.c                 |  4 ++--
 net/batman-adv/tp_meter.c              |  2 +-
 18 files changed, 78 insertions(+), 78 deletions(-)

^ permalink raw reply

* [PATCH 3/4] batman-adv: Fix "line over 80 characters" checkpatch warning
From: Simon Wunderlich @ 2017-10-06 13:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Simon Wunderlich
In-Reply-To: <20171006135437.26736-1-sw@simonwunderlich.de>

From: Sven Eckelmann <sven@narfation.org>

Fixes: 242c1a28eb61 ("net: Remove useless function skb_header_release")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/soft-interface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7c8288245f80..3af4b0b29b23 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
 	int result;
 
 	/* TODO: We must check if we can release all references to non-payload
-	 * data using __skb_header_release in our skbs to allow skb_cow_header to
-	 * work optimally. This means that those skbs are not allowed to read
+	 * data using __skb_header_release in our skbs to allow skb_cow_header
+	 * to work optimally. This means that those skbs are not allowed to read
 	 * or write any data which is before the current position of skb->data
 	 * after that call and thus allow other skbs with the same data buffer
 	 * to write freely in that area.
-- 
2.11.0

^ permalink raw reply related

* [PATCH 4/4] batman-adv: Add argument names for function ptr definitions
From: Simon Wunderlich @ 2017-10-06 13:54 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r
In-Reply-To: <20171006135437.26736-1-sw-2YrNx6rUIHYiY0qSoAWiAoQuADTiUCJX@public.gmane.org>

From: Sven Eckelmann <sven-KaDOiPu9UxWEi8DpZVb4nw@public.gmane.org>

checkpatch started to report unnamed arguments in function pointer
definitions. Add the corresponding names to these definitions to avoid this
warning.

Signed-off-by: Sven Eckelmann <sven-KaDOiPu9UxWEi8DpZVb4nw@public.gmane.org>
Signed-off-by: Simon Wunderlich <sw-2YrNx6rUIHYiY0qSoAWiAoQuADTiUCJX@public.gmane.org>
---
 net/batman-adv/main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 033819aefc39..4daed7ad46f2 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -73,8 +73,8 @@
  * list traversals just rcu-locked
  */
 struct list_head batadv_hardif_list;
-static int (*batadv_rx_handler[256])(struct sk_buff *,
-				     struct batadv_hard_iface *);
+static int (*batadv_rx_handler[256])(struct sk_buff *skb,
+				     struct batadv_hard_iface *recv_if);
 
 unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
@@ -540,8 +540,8 @@ batadv_recv_handler_register(u8 packet_type,
 			     int (*recv_handler)(struct sk_buff *,
 						 struct batadv_hard_iface *))
 {
-	int (*curr)(struct sk_buff *,
-		    struct batadv_hard_iface *);
+	int (*curr)(struct sk_buff *skb,
+		    struct batadv_hard_iface *recv_if);
 	curr = batadv_rx_handler[packet_type];
 
 	if (curr != batadv_recv_unhandled_packet &&
-- 
2.11.0

^ permalink raw reply related

* [PATCH 1/4] batman-adv: Start new development cycle
From: Simon Wunderlich @ 2017-10-06 13:54 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r
In-Reply-To: <20171006135437.26736-1-sw-2YrNx6rUIHYiY0qSoAWiAoQuADTiUCJX@public.gmane.org>

Signed-off-by: Simon Wunderlich <sw-2YrNx6rUIHYiY0qSoAWiAoQuADTiUCJX@public.gmane.org>
---
 net/batman-adv/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 05cc7637c064..edb2f239d04d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.3"
+#define BATADV_SOURCE_VERSION "2017.4"
 #endif
 
 /* B.A.T.M.A.N. parameters */
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH 0/4] RCU: introduce noref debug
From: Paul E. McKenney @ 2017-10-06 13:34 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, David S. Miller,
	Eric Dumazet, Hannes Frederic Sowa, netdev
In-Reply-To: <cover.1507294365.git.pabeni@redhat.com>

On Fri, Oct 06, 2017 at 02:57:45PM +0200, Paolo Abeni wrote:
> The networking subsystem is currently using some kind of long-lived
> RCU-protected, references to avoid the overhead of full book-keeping.
> 
> Such references - skb_dst() noref - are stored inside the skbs and can be
> moved across relevant slices of the network stack, with the users
> being in charge of properly clearing the relevant skb - or properly refcount
> the related dst references - before the skb escapes the RCU section.
> 
> We currently don't have any deterministic debug infrastructure to check
> the dst noref usages - and the introduction of others noref artifact is
> currently under discussion.
> 
> This series tries to tackle the above introducing an RCU debug infrastructure
> aimed at spotting incorrect noref pointer usage, in patch one. The
> infrastructure is small and must be explicitly enabled via a newly introduced
> build option.
> 
> Patch two uses such infrastructure to track dst noref usage in the networking
> stack.
> 
> Patch 3 and 4 are bugfixes for small buglet found running this infrastructure
> on basic scenarios.

This patchset does not look like it handles rcu_read_lock() nesting.
For example, given code like this:

	void foo(void)
	{
		rcu_read_lock();
		rcu_track_noref(&key2, &noref2, true);
		do_something();
		rcu_track_noref(&key2, &noref2, false);
		rcu_read_unlock();
	}

	void bar(void)
	{
		rcu_read_lock();
		rcu_track_noref(&key1, &noref1, true);
		do_something_more();
		foo();
		do_something_else();
		rcu_track_noref(&key1, &noref1, false);
		rcu_read_unlock();
	}

	void grill(void)
	{
		foo();
	}

It looks like foo()'s rcu_read_unlock() will complain about key1.
You could remove foo()'s rcu_read_lock() and rcu_read_unlock(), but
that will break the call from grill().

Or am I missing something subtle here?  Given patch 3/4, I suspect not...

							Thanx, Paul

> Paolo Abeni (4):
>   rcu: introduce noref debug
>   net: use RCU noref infrastructure to track dst noref
>   ipv4: drop unneeded and misleading RCU lock in ip_route_input_noref()
>   tcp: avoid noref dst leak on input path
> 
>  include/linux/rcupdate.h | 11 ++++++
>  include/linux/skbuff.h   |  1 +
>  include/net/dst.h        |  5 +++
>  kernel/rcu/Kconfig.debug | 15 ++++++++
>  kernel/rcu/Makefile      |  1 +
>  kernel/rcu/noref_debug.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++
>  net/ipv4/route.c         |  7 +---
>  net/ipv4/tcp_input.c     |  5 ++-
>  8 files changed, 127 insertions(+), 7 deletions(-)
>  create mode 100644 kernel/rcu/noref_debug.c
> 
> -- 
> 2.13.6
> 

^ permalink raw reply

* Re: [PATCH] mwifiex: Use put_unaligned_le32
From: Kalle Valo @ 2017-10-06 13:31 UTC (permalink / raw)
  To: Himanshu Jha
  Cc: Brian Norris, amitkarwar, nishants, gbhat, huxm, linux-wireless,
	netdev, linux-kernel
In-Reply-To: <20171005190730.GA8043@himanshu-Vostro-3559>

Himanshu Jha <himanshujha199640@gmail.com> writes:

> On Thu, Oct 05, 2017 at 11:02:50AM -0700, Brian Norris wrote:
>> On Thu, Oct 05, 2017 at 08:52:33PM +0530, Himanshu Jha wrote:
>> > There are various instances where a function used in file say for eg
>> > int func_align (void* a)
>> > is used and it is defined in align.h
>> > But many files don't *directly* include align.h and rather include
>> > any other header which includes align.h
>> 
>> I believe the general rule is that you should included headers for all
>> symbols you use, and not rely on implicit includes.
>> 
>> The modification to the general rule is that not all headers are
>> intended to be included directly, and in such cases there's likely a
>> parent header that is the more appropriate target.
>> 
>> In this case, the key is CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. It
>> seems that asm-generic/unaligned.h is set up to include different
>> headers, based on the expected architecture behavior.
>>
> Yes, asm-generic/unaligned.h looks more appopriate and is most generic
> implementation of unaligned accesses and  arc specific.
>
> Let's see what Kalle Valo recommends! And then I will send v2 of the
> patch.

Not sure what you are asking from me. But if you are asking should it
be:

#include <asm/unaligned.h>

or:

#include <asm-generic/unaligned.h>

I think it should be the former.

-- 
Kalle Valo

^ permalink raw reply

* Re: [PATCH net] netfilter: x_tables: avoid stack-out-of-bounds read in xt_copy_counters_from_user
From: Pablo Neira Ayuso @ 2017-10-06 13:05 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Eric Dumazet, Jozsef Kadlecsik, netfilter-devel, netdev,
	Willem de Bruijn
In-Reply-To: <20171005095644.GB27522@breakpoint.cc>

On Thu, Oct 05, 2017 at 11:56:44AM +0200, Florian Westphal wrote:
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > From: Eric Dumazet <edumazet@google.com>
> > 
> > syzkaller reports an out of bound read in strlcpy(), triggered
> > by xt_copy_counters_from_user()
> > 
> > Fix this by using memcpy(), then forcing a zero byte at the last position
> > of the destination, as Florian did for the non COMPAT code.
> > 
> > Fixes: d7591f0c41ce ("netfilter: x_tables: introduce and use xt_copy_counters_from_user")
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Cc: Willem de Bruijn <willemb@google.com>
> > ---
> >  net/netfilter/x_tables.c |    4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
> > index c83a3b5e1c6c2a91b713b6681a794bd79ab3fa08..d8571f4142080a3c121fc90f0b52d81ee9df6712 100644
> > --- a/net/netfilter/x_tables.c
> > +++ b/net/netfilter/x_tables.c
> > @@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
> >  		if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
> >  			return ERR_PTR(-EFAULT);
> >  
> > -		strlcpy(info->name, compat_tmp.name, sizeof(info->name));
> > +		memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
> 
> Argh, right, compat_tmp.name might not be 0 terminated :-/
> 
> Acked-by: Florian Westphal <fw@strlen.de>

Applied to nf, thanks.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox