Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v3 net] tcp: randomize timestamps on syncookies
From: Eric Dumazet @ 2017-05-05 13:56 UTC (permalink / raw)
  To: Florian Westphal; +Cc: David Miller, netdev, Yuchung Cheng
In-Reply-To: <1493950957.7796.36.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <edumazet@google.com>

Whole point of randomization was to hide server uptime, but an attacker
can simply start a syn flood and TCP generates 'old style' timestamps,
directly revealing server jiffies value.

Also, TSval sent by the server to a particular remote address vary
depending on syncookies being sent or not, potentially triggering PAWS
drops for innocent clients.

Lets implement proper randomization, including for SYNcookies.

Also we do not need to export sysctl_tcp_timestamps, since it is not
used from a module.

In v2, I added Florian feedback and contribution, adding tsoff to
tcp_get_cookie_sock().

v3 removed one unused variable in tcp_v4_connect() as Florian spotted.

Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Tested-by: Florian Westphal <fw@strlen.de>
Cc: Yuchung Cheng <ycheng@google.com>
---
 include/net/secure_seq.h |   10 ++++++----
 include/net/tcp.h        |    5 +++--
 net/core/secure_seq.c    |   31 +++++++++++++++++++------------
 net/ipv4/syncookies.c    |   12 ++++++++++--
 net/ipv4/tcp_input.c     |    8 +++-----
 net/ipv4/tcp_ipv4.c      |   32 +++++++++++++++++++-------------
 net/ipv6/syncookies.c    |   10 +++++++++-
 net/ipv6/tcp_ipv6.c      |   32 +++++++++++++++++++-------------
 8 files changed, 88 insertions(+), 52 deletions(-)

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index fe236b3429f0d8caeb1adc367b5b4a20591c848b..b94006f6fbdde0d78fe33b9c2d86159e291c30cf 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -6,10 +6,12 @@
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport);
-u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
-			     __be16 sport, __be16 dport, u32 *tsoff);
-u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
-			       __be16 sport, __be16 dport, u32 *tsoff);
+u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+		   __be16 sport, __be16 dport);
+u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr);
+u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+		     __be16 sport, __be16 dport);
+u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr);
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				__be16 sport, __be16 dport);
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 270e5cc43c99e7030e95af218095cf9f283950bc..8c0e5a901d6424fbd01233cd3adfdce52076f7a9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -470,7 +470,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
 /* From syncookies.c */
 struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 				 struct request_sock *req,
-				 struct dst_entry *dst);
+				 struct dst_entry *dst, u32 tsoff);
 int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
 		      u32 cookie);
 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
@@ -1822,7 +1822,8 @@ struct tcp_request_sock_ops {
 #endif
 	struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
 				       const struct request_sock *req);
-	__u32 (*init_seq_tsoff)(const struct sk_buff *skb, u32 *tsoff);
+	u32 (*init_seq)(const struct sk_buff *skb);
+	u32 (*init_ts_off)(const struct sk_buff *skb);
 	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
 			   struct flowi *fl, struct request_sock *req,
 			   struct tcp_fastopen_cookie *foc,
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6bd2f8fb0476baabf507557fc0d06b6787511c70..ae35cce3a40d70387bee815798933aa43a0e6d84 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -24,9 +24,13 @@ static siphash_key_t ts_secret __read_mostly;
 
 static __always_inline void net_secret_init(void)
 {
-	net_get_random_once(&ts_secret, sizeof(ts_secret));
 	net_get_random_once(&net_secret, sizeof(net_secret));
 }
+
+static __always_inline void ts_secret_init(void)
+{
+	net_get_random_once(&ts_secret, sizeof(ts_secret));
+}
 #endif
 
 #ifdef CONFIG_INET
@@ -47,7 +51,7 @@ static u32 seq_scale(u32 seq)
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
-static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
 {
 	const struct {
 		struct in6_addr saddr;
@@ -60,12 +64,14 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
 	if (sysctl_tcp_timestamps != 1)
 		return 0;
 
+	ts_secret_init();
 	return siphash(&combined, offsetofend(typeof(combined), daddr),
 		       &ts_secret);
 }
+EXPORT_SYMBOL(secure_tcpv6_ts_off);
 
-u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
-			       __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
+		     __be16 sport, __be16 dport)
 {
 	const struct {
 		struct in6_addr saddr;
@@ -78,14 +84,14 @@ u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
 		.sport = sport,
 		.dport = dport
 	};
-	u64 hash;
+	u32 hash;
+
 	net_secret_init();
 	hash = siphash(&combined, offsetofend(typeof(combined), dport),
 		       &net_secret);
-	*tsoff = secure_tcpv6_ts_off(saddr, daddr);
 	return seq_scale(hash);
 }
-EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff);
+EXPORT_SYMBOL(secure_tcpv6_seq);
 
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport)
@@ -107,11 +113,12 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #ifdef CONFIG_INET
-static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
 {
 	if (sysctl_tcp_timestamps != 1)
 		return 0;
 
+	ts_secret_init();
 	return siphash_2u32((__force u32)saddr, (__force u32)daddr,
 			    &ts_secret);
 }
@@ -121,15 +128,15 @@ static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
  * it would be easy enough to have the former function use siphash_4u32, passing
  * the arguments as separate u32.
  */
-u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
-			     __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
+		   __be16 sport, __be16 dport)
 {
-	u64 hash;
+	u32 hash;
+
 	net_secret_init();
 	hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
 			    (__force u32)sport << 16 | (__force u32)dport,
 			    &net_secret);
-	*tsoff = secure_tcp_ts_off(saddr, daddr);
 	return seq_scale(hash);
 }
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 496b97e17aaf7ed2cf41cef303cb0696927f66ac..0257d965f11119acf8c55888d6e672d171ef5f08 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -16,6 +16,7 @@
 #include <linux/siphash.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <net/secure_seq.h>
 #include <net/tcp.h>
 #include <net/route.h>
 
@@ -203,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check);
 
 struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 				 struct request_sock *req,
-				 struct dst_entry *dst)
+				 struct dst_entry *dst, u32 tsoff)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
@@ -213,6 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 						 NULL, &own_req);
 	if (child) {
 		atomic_set(&req->rsk_refcnt, 1);
+		tcp_sk(child)->tsoffset = tsoff;
 		sock_rps_save_rxhash(child, skb);
 		inet_csk_reqsk_queue_add(sk, req, child);
 	} else {
@@ -292,6 +294,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	struct rtable *rt;
 	__u8 rcv_wscale;
 	struct flowi4 fl4;
+	u32 tsoff = 0;
 
 	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -311,6 +314,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, 0, NULL);
 
+	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
+		tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
+		tcp_opt.rcv_tsecr -= tsoff;
+	}
+
 	if (!cookie_timestamp_decode(&tcp_opt))
 		goto out;
 
@@ -381,7 +389,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->rcv_wscale  = rcv_wscale;
 	ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
 
-	ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
+	ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff);
 	/* ip_queue_xmit() depends on our flow being setup
 	 * Normal sockets get it right from inet_csk_route_child_sock()
 	 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9739962bfb3fd2d39cb13f643def223f4f17fcb6..5a3ad09e2786fb41ad12681d09938c645b69866d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,7 +85,6 @@ int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-EXPORT_SYMBOL(sysctl_tcp_timestamps);
 
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
@@ -6347,8 +6346,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	if (isn && tmp_opt.tstamp_ok)
-		af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
+	if (tmp_opt.tstamp_ok)
+		tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb);
 
 	if (!want_cookie && !isn) {
 		/* Kill the following clause, if you dislike this way. */
@@ -6368,7 +6367,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 			goto drop_and_release;
 		}
 
-		isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
+		isn = af_ops->init_seq(skb);
 	}
 	if (!dst) {
 		dst = af_ops->route_req(sk, &fl, req);
@@ -6380,7 +6379,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 	if (want_cookie) {
 		isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
-		tcp_rsk(req)->ts_off = 0;
 		req->cookie_ts = tmp_opt.tstamp_ok;
 		if (!tmp_opt.tstamp_ok)
 			inet_rsk(req)->ecn_ok = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cbbafe546c0f5c5f43531eaf24f5b460264785c6..3a51582bef551f0dffd4e6b3968e23d29fcda6d1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -94,12 +94,18 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
-static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v4_init_seq(const struct sk_buff *skb)
 {
-	return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr,
-					ip_hdr(skb)->saddr,
-					tcp_hdr(skb)->dest,
-					tcp_hdr(skb)->source, tsoff);
+	return secure_tcp_seq(ip_hdr(skb)->daddr,
+			      ip_hdr(skb)->saddr,
+			      tcp_hdr(skb)->dest,
+			      tcp_hdr(skb)->source);
+}
+
+static u32 tcp_v4_init_ts_off(const struct sk_buff *skb)
+{
+	return secure_tcp_ts_off(ip_hdr(skb)->daddr,
+				 ip_hdr(skb)->saddr);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -145,7 +151,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct flowi4 *fl4;
 	struct rtable *rt;
 	int err;
-	u32 seq;
 	struct ip_options_rcu *inet_opt;
 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
@@ -232,13 +237,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	rt = NULL;
 
 	if (likely(!tp->repair)) {
-		seq = secure_tcp_seq_and_tsoff(inet->inet_saddr,
-					       inet->inet_daddr,
-					       inet->inet_sport,
-					       usin->sin_port,
-					       &tp->tsoffset);
 		if (!tp->write_seq)
-			tp->write_seq = seq;
+			tp->write_seq = secure_tcp_seq(inet->inet_saddr,
+						       inet->inet_daddr,
+						       inet->inet_sport,
+						       usin->sin_port);
+		tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr,
+						 inet->inet_daddr);
 	}
 
 	inet->inet_id = tp->write_seq ^ jiffies;
@@ -1239,7 +1244,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 	.cookie_init_seq =	cookie_v4_init_sequence,
 #endif
 	.route_req	=	tcp_v4_route_req,
-	.init_seq_tsoff	=	tcp_v4_init_seq_and_tsoff,
+	.init_seq	=	tcp_v4_init_seq,
+	.init_ts_off	=	tcp_v4_init_ts_off,
 	.send_synack	=	tcp_v4_send_synack,
 };
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 895ff650db43017ef39344679771d94ad6eaaf00..5abc3692b9011b140816dc4ce6223e79e5defddb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -18,6 +18,7 @@
 #include <linux/random.h>
 #include <linux/siphash.h>
 #include <linux/kernel.h>
+#include <net/secure_seq.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
 
@@ -143,6 +144,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
+	u32 tsoff = 0;
 
 	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -162,6 +164,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, 0, NULL);
 
+	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
+		tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
+					    ipv6_hdr(skb)->saddr.s6_addr32);
+		tcp_opt.rcv_tsecr -= tsoff;
+	}
+
 	if (!cookie_timestamp_decode(&tcp_opt))
 		goto out;
 
@@ -242,7 +250,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq->rcv_wscale = rcv_wscale;
 	ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
 
-	ret = tcp_get_cookie_sock(sk, skb, req, dst);
+	ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff);
 out:
 	return ret;
 out_free:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8e42e8f54b705ed8780890c7434feeff1055599a..aeb9497b5bb754f2277dec4a4dec02bf25bdbbe5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -101,12 +101,18 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
-static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v6_init_seq(const struct sk_buff *skb)
 {
-	return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32,
-					  ipv6_hdr(skb)->saddr.s6_addr32,
-					  tcp_hdr(skb)->dest,
-					  tcp_hdr(skb)->source, tsoff);
+	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
+				ipv6_hdr(skb)->saddr.s6_addr32,
+				tcp_hdr(skb)->dest,
+				tcp_hdr(skb)->source);
+}
+
+static u32 tcp_v6_init_ts_off(const struct sk_buff *skb)
+{
+	return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
+				   ipv6_hdr(skb)->saddr.s6_addr32);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -122,7 +128,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
-	u32 seq;
 	int err;
 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
@@ -282,13 +287,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	sk_set_txhash(sk);
 
 	if (likely(!tp->repair)) {
-		seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32,
-						 sk->sk_v6_daddr.s6_addr32,
-						 inet->inet_sport,
-						 inet->inet_dport,
-						 &tp->tsoffset);
 		if (!tp->write_seq)
-			tp->write_seq = seq;
+			tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
+							 sk->sk_v6_daddr.s6_addr32,
+							 inet->inet_sport,
+							 inet->inet_dport);
+		tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32,
+						   sk->sk_v6_daddr.s6_addr32);
 	}
 
 	if (tcp_fastopen_defer_connect(sk, &err))
@@ -749,7 +754,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.cookie_init_seq =	cookie_v6_init_sequence,
 #endif
 	.route_req	=	tcp_v6_route_req,
-	.init_seq_tsoff	=	tcp_v6_init_seq_and_tsoff,
+	.init_seq	=	tcp_v6_init_seq,
+	.init_ts_off	=	tcp_v6_init_ts_off,
 	.send_synack	=	tcp_v6_send_synack,
 };
 

^ permalink raw reply related

* Re: [PATCH 1/2] PCI: Add new PCIe Fabric End Node flag, PCI_DEV_FLAGS_NO_RELAXED_ORDERING
From: Alexander Duyck @ 2017-05-05 14:04 UTC (permalink / raw)
  To: Casey Leedom
  Cc: Raj, Ashok, Bjorn Helgaas, Michael Werner, Ganesh GR, Arjun V.,
	Asit K Mallick, Patrick J Cramer, Suravee Suthikulpanit, Bob Shaw,
	h, Ding Tianhong, Mark Rutland, Amir Ancel, Gabriele Paoloni,
	Catalin Marinas, Will Deacon, LinuxArm, David Laight,
	Jeff Kirsher, Netdev
In-Reply-To: <MWHPR12MB1600E78DD7F530AA9DC2433DC8EA0@MWHPR12MB1600.namprd12.prod.outlook.com>

On Thu, May 4, 2017 at 2:01 PM, Casey Leedom <leedom@chelsio.com> wrote:
> | From: Alexander Duyck <alexander.duyck@gmail.com>
> | Sent: Wednesday, May 3, 2017 9:02 AM
> | ...
> | It sounds like we are more or less in agreement. My only concern is
> | really what we default this to. On x86 I would say we could probably
> | default this to disabled for existing platforms since my understanding
> | is that relaxed ordering doesn't provide much benefit on what is out
> | there right now when performing DMA through the root complex. As far
> | as peer-to-peer I would say we should probably look at enabling the
> | ability to have Relaxed Ordering enabled for some channels but not
> | others. In those cases the hardware needs to be smart enough to allow
> | for you to indicate you want it disabled by default for most of your
> | DMA channels, and then enabled for the select channels that are
> | handling the peer-to-peer traffic.
>
>   Yes, I think that we are mostly in agreement.  I had just wanted to make
> sure that whatever scheme was developed would allow for simultaneously
> supporting non-Relaxed Ordering for some PCIe End Points and Relaxed
> Ordering for others within the same system.  I.e. not simply
> enabling/disabling/etc.  based solely on System Platform Architecture.
>
>   By the way, I've started our QA folks off looking at what things look like
> in Linux Virtual Machines under different Hypervisors to see what
> information they may provide to the VM in the way of what Root Complex Port
> is being used, etc.  So far they've got Windows HyperV done and there
> there's no PCIe Fabric exposed in any way: just the attached device.  I'll
> have to see what pci_find_pcie_root_port() returns in that environment.
> Maybe NULL?

I believe NULL is one of the options. It all depends on what qemu is
emulating. Most likely you won't find a pcie root port on KVM because
the default is to emulate an older system that only supports PCI.

>   With your reservations (which I also share), I think that it probably
> makes sense to have a per-architecture definition of the "Can I Use Relaxed
> Ordering With TLPs Directed At This End Point" predicate, with the default
> being "No" for any architecture which doesn't implement the predicate.  And
> if the specified (struct pci_dev *) End Node is NULL, it ought to return
> False for that as well.  I can't see any reason to pass in the Source End
> Node but I may be missing something.
>
>   At this point, this is pretty far outside my level of expertise.  I'm
> happy to give it a go, but I'd be even happier if someone with a lot more
> experience in the PCIe Infrastructure were to want to carry the ball
> forward.  I'm not super familiar with the Linux Kernel "Rules Of
> Engagement", so let me know what my next step should be.  Thanks.
>
> Casey

For now we can probably keep this on the linux-pci mailing list. Going
that route is the most straight forward for now since step one is
probably just making sure we are setting the relaxed ordering bit in
the setups that make sense. I would say we could probably keep it
simple. We just need to enable relaxed ordering by default for SPARC
architectures, on most others we can probably default it to off.

I believe this all had started as Ding Tianhong was hoping to enable
this for the ARM architecture. That is the only one I can think of
where it might be difficult to figure out which way to default as we
were attempting to follow the same code that was enabled for SPARC and
that is what started this tug-of-war about how this should be done.
What we might do is take care of this in two phases. The first one
enables the infrastructure generically but leaves it defaulted to off
for everyone but SPARC. Then we can go through and start enabling it
for other platforms such as some of those on ARM in the platforms that
Ding Tianhong was working with.

- Alex

^ permalink raw reply

* [PATCHv4] wlcore: add wl1285 compatible
From: Sebastian Reichel @ 2017-05-05 14:15 UTC (permalink / raw)
  To: David S. Miller
  Cc: Sebastian Reichel, Kalle Valo, Tony Lindgren, Marcel Holtmann,
	Rob Herring, linux-wireless, linux-omap, linux-kernel, netdev,
	Sebastian Reichel

Motorola Droid 4 uses a WL1285C. With differences between the
chips not being public let's add explicit binding for wl1285
instead of relying on wl1283 being very similar.

Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Kalle Valo <kvalo@codeaurora.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
Hi Dave,

I previously send this in two patches, but its hard to apply without
requiring multiple kernel releases (the driver must be updated before
the DTS change). Since the actual change is not very complex Marcel
Holtmann & Tony Lindgren suggested, that I send this directly to you
in a single patch for inclusion into 4.12. This also means, that the
remaining series can be queued normally for 4.13.

Changes since PATCHv3:
 - add netdev@vger.kernel.org to cc
 - add Acked-By from Tony & Kalle
Changes since PATCHv2:
 - merge patch for DTS and driver
 - add Acked-By from Rob
Changes since PATCHv1:
 - patches did not exist in patchv1

-- Sebastian
---
 Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt | 1 +
 arch/arm/boot/dts/omap4-droid4-xt894.dts                     | 2 +-
 drivers/net/wireless/ti/wlcore/sdio.c                        | 1 +
 drivers/net/wireless/ti/wlcore/spi.c                         | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt b/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt
index 2a3d90de18ee..7b2cbb14113e 100644
--- a/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt
+++ b/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt
@@ -10,6 +10,7 @@ Required properties:
     * "ti,wl1273"
     * "ti,wl1281"
     * "ti,wl1283"
+    * "ti,wl1285"
     * "ti,wl1801"
     * "ti,wl1805"
     * "ti,wl1807"
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index 89eb607f4a9e..c09b51d6ccf7 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -301,7 +301,7 @@
 	#address-cells = <1>;
 	#size-cells = <0>;
 	wlcore: wlcore@2 {
-		compatible = "ti,wl1283";
+		compatible = "ti,wl1285";
 		reg = <2>;
 		interrupt-parent = <&gpio4>;
 		interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; /* gpio100 */
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index 287023ef4a78..2fb38717346f 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -237,6 +237,7 @@ static const struct of_device_id wlcore_sdio_of_match_table[] = {
 	{ .compatible = "ti,wl1273", .data = &wl127x_data },
 	{ .compatible = "ti,wl1281", .data = &wl128x_data },
 	{ .compatible = "ti,wl1283", .data = &wl128x_data },
+	{ .compatible = "ti,wl1285", .data = &wl128x_data },
 	{ .compatible = "ti,wl1801", .data = &wl18xx_data },
 	{ .compatible = "ti,wl1805", .data = &wl18xx_data },
 	{ .compatible = "ti,wl1807", .data = &wl18xx_data },
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index f949ad2bd898..1f5d9ebb0925 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -433,6 +433,7 @@ static const struct of_device_id wlcore_spi_of_match_table[] = {
 	{ .compatible = "ti,wl1273", .data = &wl127x_data},
 	{ .compatible = "ti,wl1281", .data = &wl128x_data},
 	{ .compatible = "ti,wl1283", .data = &wl128x_data},
+	{ .compatible = "ti,wl1285", .data = &wl128x_data},
 	{ .compatible = "ti,wl1801", .data = &wl18xx_data},
 	{ .compatible = "ti,wl1805", .data = &wl18xx_data},
 	{ .compatible = "ti,wl1807", .data = &wl18xx_data},
-- 
2.11.0

^ permalink raw reply related

* [PATCH net] bridge: netlink: account for IFLA_BRPORT_{B,M}CAST_FLOOD size and policy
From: Tobias Klauser @ 2017-05-05 14:36 UTC (permalink / raw)
  To: Stephen Hemminger, David S. Miller
  Cc: bridge, netdev, Nikolay Aleksandrov, Mike Manning

The attribute sizes for IFLA_BRPORT_MCAST_FLOOD and
IFLA_BRPORT_BCAST_FLOOD weren't accounted for in br_port_info_size()
when they were added. Do so now and also add the corresponding policy
entries:

Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Cc: Mike Manning <mmanning@brocade.com>
Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag")
Fixes: 99f906e9ad7b ("bridge: add per-port broadcast flood flag")
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 net/bridge/br_netlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a572db710d4e..c5ce7745b230 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -133,6 +133,8 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_MCAST_TO_UCAST */
 		+ nla_total_size(1)	/* IFLA_BRPORT_LEARNING */
 		+ nla_total_size(1)	/* IFLA_BRPORT_UNICAST_FLOOD */
+		+ nla_total_size(1)	/* IFLA_BRPORT_MCAST_FLOOD */
+		+ nla_total_size(1)	/* IFLA_BRPORT_BCAST_FLOOD */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROXYARP */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROXYARP_WIFI */
 		+ nla_total_size(1)	/* IFLA_BRPORT_VLAN_TUNNEL */
@@ -633,6 +635,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 },
 	[IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 },
 	[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
+	[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
+	[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
 };
 
 /* Change the state of the port and notify spanning tree */
-- 
2.12.2

^ permalink raw reply related

* Re: [PATCH] xen-netfront: avoid crashing on resume after a failure in talk_to_netback()
From: Vitaly Kuznetsov @ 2017-05-05 14:40 UTC (permalink / raw)
  To: David Miller; +Cc: xen-devel, netdev, linux-kernel, boris.ostrovsky, jgross
In-Reply-To: <20170504.112150.391662736580694835.davem@davemloft.net>

David Miller <davem@davemloft.net> writes:

> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Date: Thu,  4 May 2017 14:23:04 +0200
>
>> Unavoidable crashes in netfront_resume() and netback_changed() after a
>> previous fail in talk_to_netback() (e.g. when we fail to read MAC from
>> xenstore) were discovered. The failure path in talk_to_netback() does
>> unregister/free for netdev but we don't reset drvdata and we try accessing
>> it again after resume.
>> 
>> Reset drvdata in netback_changed() the same way we reset it in
>> netfront_probe() and check for NULL in both netfront_resume() and
>> netback_changed() to properly handle the situation.
>> 
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>
> The circumstances under which netfront_probe() NULLs out the device
> private is different than what you propose here, which is to do it
> on a live device in netback_changed() whilst mutliple susbsytems
> have a reference to this device and can call into the driver still.
>
> It is only legal to do this in the probe function because such
> references and execution possibilities do not exist at that point.
>
> What really needs to happen is that the xenbus_driver must be told to
> unregister this xen device and stop making calls into the driver for
> it before you release the netdev state.
>
> That is the only reasonable way to fix this bug.

True,

after looking at the issue again I realized that removing half of the
device in talk_to_netback() is a mistake - we should either treat errors
as fatal and remove the device completely or leave netdev in place
hoping that it'll magically got fixed later. I'm leaning towards the
former, I tried and the following simple patch does the job:

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 6ffc482..7b61adb 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1934,8 +1934,7 @@ static int talk_to_netback(struct xenbus_device *dev,
        xennet_disconnect_backend(info);
        xennet_destroy_queues(info);
  out:
-       unregister_netdev(info->netdev);
-       xennet_free_netdev(info->netdev);
+       device_unregister(&dev->dev);
        return err;
 }

In case noone is against this big hammer I can send this as v2.

Thank you for your feedback, David!

-- 
  Vitaly

^ permalink raw reply related

* Re: [PATCH 0/4] TI Bluetooth serdev support
From: Adam Ford @ 2017-05-05 14:51 UTC (permalink / raw)
  To: Sebastian Reichel
  Cc: Rob Herring, Marcel Holtmann,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA, Mark Rutland,
	devicetree-u79uwXL29TY76Z2rM5mHXA, Johan Hedberg, Gustavo Padovan,
	Satish Patel, Wei Xu, Eyal Reizer, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
In-Reply-To: <20170430160430.rmyuo6sdrkrjxjg6@earth>

On Sun, Apr 30, 2017 at 11:04 AM, Sebastian Reichel <sre-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> Hi,
>
> On Sun, Apr 30, 2017 at 10:14:20AM -0500, Adam Ford wrote:
>> On Wed, Apr 5, 2017 at 1:30 PM, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>> > This series adds serdev support to the HCI LL protocol used on TI BT
>> > modules and enables support on HiKey board with with the WL1835 module.
>> > With this the custom TI UIM daemon and btattach are no longer needed.
>>
>> Without UIM daemon, what instruction do you use to load the BT firmware?
>>
>> I was thinking 'hciattach' but I was having trouble.  I was hoping you
>> might have some insight.
>>
>>  hciattach -t 30 -s 115200 /dev/ttymxc1 texas 3000000 flow  Just
>> returns a timeout.
>>
>> I modified my i.MX6 device tree per the binding documentation and
>> setup the regulators and enable GPIO pins.
>
> If you configured everything correctly no userspace interaction is
> required. The driver should request the firmware automatically once
> you power up the bluetooth device.
>
> Apart from DT changes make sure, that the following options are
> enabled and check dmesg for any hints.
>
> CONFIG_SERIAL_DEV_BUS
> CONFIG_SERIAL_DEV_CTRL_TTYPORT
> CONFIG_BT_HCIUART
> CONFIG_BT_HCIUART_LL
>


I have enabled those flags, and I have updated my device tree.
I am testing this on an OMAP3630 (DM3730) board with a WL1283.  I am
getting a lot of timeout errors.  I tested this against the original
implemention I had in pdata-quirks.c using the ti-st driver, uim & and
the btwilink driver.

I pulled in some of the newer patches to enable the wl1283-st, but I
am obviously missing something.

I   58.717651] Bluetooth: hci0: Reading TI version information failed
(-110)
[   58.724853] Bluetooth: hci0: download firmware failed, retrying...
[   60.957641] Bluetooth: hci0 command 0x1001 tx timeout
[   68.957641] Bluetooth: hci0: Reading TI version information failed
(-110)
[   68.964843] Bluetooth: hci0: download firmware failed, retrying...
[   69.132171] Bluetooth: Unknown HCI packet type 06
[   69.138244] Bluetooth: Unknown HCI packet type 0c
[   69.143249] Bluetooth: Unknown HCI packet type 40
[   69.148498] Bluetooth: Unknown HCI packet type 20
[   69.153533] Bluetooth: Data length is too large
[   69.158569] Bluetooth: Unknown HCI packet type a0
[   69.163574] Bluetooth: Unknown HCI packet type 00
[   69.168731] Bluetooth: Unknown HCI packet type 00
[   69.173736] Bluetooth: Unknown HCI packet type 34
[   69.178924] Bluetooth: Unknown HCI packet type 91
[   71.197631] Bluetooth: hci0 command 0x1001 tx timeout
[   79.197662] Bluetooth: hci0: Reading TI version information failed (-110)

Since the pdata-quirks and original ti-st drivers work together, I
know the hardware is fine.  The only change to the device tree is the
addition of the Bluetooth container:

bluetooth {
  compatible = "ti,wl1283-st";
  enable-gpios = <&gpio6 2 GPIO_ACTIVE_HIGH>;
};

Any thoughts or suggestions to try?  I get similar behavior on an
i.MX6 board with a wl1837-st module as well.

adam
> -- Sebastian
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net] bridge: netlink: account for IFLA_BRPORT_{B,M}CAST_FLOOD size and policy
From: Nikolay Aleksandrov @ 2017-05-05 14:55 UTC (permalink / raw)
  To: Tobias Klauser, Stephen Hemminger, David S. Miller
  Cc: bridge, netdev, Mike Manning
In-Reply-To: <20170505143653.8486-1-tklauser@distanz.ch>

On 05/05/17 17:36, Tobias Klauser wrote:
> The attribute sizes for IFLA_BRPORT_MCAST_FLOOD and
> IFLA_BRPORT_BCAST_FLOOD weren't accounted for in br_port_info_size()
> when they were added. Do so now and also add the corresponding policy
> entries:
> 
> Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
> Cc: Mike Manning <mmanning@brocade.com>
> Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag")
> Fixes: 99f906e9ad7b ("bridge: add per-port broadcast flood flag")
> Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
> ---
>  net/bridge/br_netlink.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 

Oops, good catch.

Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

^ permalink raw reply

* Re: [PATCH net] bridge: netlink: account for IFLA_BRPORT_{B,M}CAST_FLOOD size and policy
From: David Miller @ 2017-05-05 15:22 UTC (permalink / raw)
  To: tklauser; +Cc: stephen, bridge, netdev, nikolay, mmanning
In-Reply-To: <20170505143653.8486-1-tklauser@distanz.ch>

From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri,  5 May 2017 16:36:53 +0200

> The attribute sizes for IFLA_BRPORT_MCAST_FLOOD and
> IFLA_BRPORT_BCAST_FLOOD weren't accounted for in br_port_info_size()
> when they were added. Do so now and also add the corresponding policy
> entries:
> 
> Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
> Cc: Mike Manning <mmanning@brocade.com>
> Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag")
> Fixes: 99f906e9ad7b ("bridge: add per-port broadcast flood flag")
> Signed-off-by: Tobias Klauser <tklauser@distanz.ch>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* admin
From: administrador @ 2017-05-05 14:13 UTC (permalink / raw)
  To: Recipients

ATENCIÓN;

Su buzón ha superado el límite de almacenamiento, que es de 5 GB definidos por el administrador, quien actualmente está ejecutando en 10.9GB, no puede ser capaz de  enviar o recibir correo nuevo hasta que vuelva a validar subuzón de correo electrónico. Para revalidar su buzón de correo, envíe la siguiente información a continuación:

nombre:
Nombre de usuario:
contraseña: 
Confirmar contraseña: 
E-mail: 
teléfono: 0

Si usted no puede revalidar su buzón, el buzón se deshabilitará!

Disculpa las molestias.
Código de verificación: es:00916gbd51.17 
Correo Soporte Técnico © 2017

¡gracias
Sistemas administrador

^ permalink raw reply

* Re: [PATCH v3 net] tcp: randomize timestamps on syncookies
From: David Miller @ 2017-05-05 16:00 UTC (permalink / raw)
  To: eric.dumazet; +Cc: fw, netdev, ycheng
In-Reply-To: <1493992614.7796.46.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 05 May 2017 06:56:54 -0700

> From: Eric Dumazet <edumazet@google.com>
> 
> Whole point of randomization was to hide server uptime, but an attacker
> can simply start a syn flood and TCP generates 'old style' timestamps,
> directly revealing server jiffies value.
> 
> Also, TSval sent by the server to a particular remote address vary
> depending on syncookies being sent or not, potentially triggering PAWS
> drops for innocent clients.
> 
> Lets implement proper randomization, including for SYNcookies.
> 
> Also we do not need to export sysctl_tcp_timestamps, since it is not
> used from a module.
> 
> In v2, I added Florian feedback and contribution, adding tsoff to
> tcp_get_cookie_sock().
> 
> v3 removed one unused variable in tcp_v4_connect() as Florian spotted.
> 
> Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reviewed-by: Florian Westphal <fw@strlen.de>
> Tested-by: Florian Westphal <fw@strlen.de>

Applied and queued up for -stable, thanks Eric.

^ permalink raw reply

* Question about packet_mmap & huge pages
From: DESBRUS Maxime @ 2017-05-05 16:00 UTC (permalink / raw)
  To: netdev@vger.kernel.org

Hello,

I am currently evaluating various 0-copy packet capture frameworks on Linux.
One of them is packet mmap, provided by the mainline Linux kernel.

I want to capture on one network interface with several raw sockets, each being used in a dedicated thread (to spread the processing load on several cores).
My first test program works well, however I am now trying to gain even more performance by mapping RX rings inside 1GB huge pages (other 0-copy network frameworks like DPDK do this).

My first question is: is it possible to map the rx ring of packet_mmap using huge pages, with the current Linux kernel?
If it is possible, what specific flags must be passed to mmap?

Here is what I tried so far:
* Calling mmap on the socket file descriptor (to set the rx ring address) with MAP_SHARED | MAP_HUGETLB | MAP_HUGE_1GB
=> mmap returns error EINVAL
* Calling mmap on the socket file descriptor (to set the rx ring address) with MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_1GB
=> mmap succeeds and a huge page is allocated, but the memory area is all zero and does not contain the expected ring structures. I suspect the MAP_ANONYMOUS flag makes it ignore the socket file descriptor, and the mapping is thus unrelated to the ring.
* Calling mmap first to map a huge page area by opening a file on a hugetlbfs mount, and then calling mmap on the socket file descriptor (to set the ring address) with an address inside the previously mapped area
=> the address hint is ignored, and thus the ring is not mapped inside a huge page

I am using "tpacket_v3" packet_mmap version and my kernel version is based on 4.8 (on Ubuntu 16.04). 
I reserve huge pages with the kernel boot command line, as recommended in Documentation/vm/hugetlbpage.txt.

Thank you in advance for your guidance

^ permalink raw reply

* [PATCH] misplaced EXPORT_SYMBOL_GPL(ping_hash) in net/ipv4/ping.c
From: Vladis Dronov @ 2017-05-05 16:17 UTC (permalink / raw)
  To: netdev; +Cc: Vladis Dronov

Move misplaced EXPORT_SYMBOL_GPL(ping_hash) to a proper place.

Signed-off-by: Vladis Dronov <vdronov@redhat.com>
---

Actually, this is so small and unimportant (it just hurts my perfectionism),
so does not worth a separate patch. Please, feel free to make it a part of
some patch of yours.

 net/ipv4/ping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index ccfbce1..19f0b7b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -71,7 +71,6 @@ static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask)
 	pr_debug("hash(%u) = %u\n", num, res);
 	return res;
 }
-EXPORT_SYMBOL_GPL(ping_hash);
 
 static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
 					     struct net *net, unsigned int num)
@@ -152,6 +151,7 @@ int ping_hash(struct sock *sk)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ping_hash);
 
 void ping_unhash(struct sock *sk)
 {
-- 
2.9.3

^ permalink raw reply related

* [PATCH net-next] ibmvnic: Track state of adapter napis
From: John Allen @ 2017-05-05 16:31 UTC (permalink / raw)
  To: netdev; +Cc: Nathan Fontenot, Thomas Falcon, brking, muvic

Track the state of ibmvnic napis. The driver can get into states where it
can be reset when napis are already disabled and attempting to disable them
again will cause the driver to hang.

Signed-off-by: John Allen <jallen@linux.vnet.ibm.com>
---
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4f2d329..594ee6d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -675,8 +675,12 @@ static int __ibmvnic_open(struct net_device *netdev)
 	adapter->state = VNIC_OPENING;
 	replenish_pools(adapter);

-	for (i = 0; i < adapter->req_rx_queues; i++)
-		napi_enable(&adapter->napi[i]);
+	if (adapter->napi_disabled) {
+		for (i = 0; i < adapter->req_rx_queues; i++)
+			napi_enable(&adapter->napi[i]);
+
+		adapter->napi_disabled = false;
+	}

 	/* We're ready to receive frames, enable the sub-crq interrupts and
 	 * set the logical link state to up
@@ -780,9 +784,11 @@ static int __ibmvnic_close(struct net_device *netdev)
 	adapter->state = VNIC_CLOSING;
 	netif_tx_stop_all_queues(netdev);

-	if (adapter->napi) {
+	if (!adapter->napi_disabled) {
 		for (i = 0; i < adapter->req_rx_queues; i++)
 			napi_disable(&adapter->napi[i]);
+
+		adapter->napi_disabled = true;
 	}

 	clean_tx_pools(adapter);
@@ -3540,6 +3546,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		free_netdev(netdev);
 		return rc;
 	}
+
+	adapter->napi_disabled = true;
+
 	dev_info(&dev->dev, "ibmvnic registered\n");

 	adapter->state = VNIC_PROBED;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 4702b48..12b2400 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1031,4 +1031,5 @@ struct ibmvnic_adapter {
 	struct list_head rwi_list;
 	struct work_struct ibmvnic_reset;
 	bool resetting;
+	bool napi_disabled;
 };

^ permalink raw reply related

* Re: [PATCH net-next] ibmvnic: Track state of adapter napis
From: David Miller @ 2017-05-05 16:43 UTC (permalink / raw)
  To: jallen; +Cc: netdev, nfont, tlfalcon, brking, muvic
In-Reply-To: <47c6bbb6-faa0-b07b-871f-10e804b2152d@linux.vnet.ibm.com>

From: John Allen <jallen@linux.vnet.ibm.com>
Date: Fri, 5 May 2017 11:31:58 -0500

> Track the state of ibmvnic napis. The driver can get into states where it
> can be reset when napis are already disabled and attempting to disable them
> again will cause the driver to hang.
> 
> Signed-off-by: John Allen <jallen@linux.vnet.ibm.com>

The net-next tree is closed, resubmit this when the net-next tree opens
back up.

Thanks.

^ permalink raw reply

* Re: [PATCH] misplaced EXPORT_SYMBOL_GPL(ping_hash) in net/ipv4/ping.c
From: David Miller @ 2017-05-05 16:44 UTC (permalink / raw)
  To: vdronov; +Cc: netdev
In-Reply-To: <20170505161716.6752-1-vdronov@redhat.com>

From: Vladis Dronov <vdronov@redhat.com>
Date: Fri,  5 May 2017 18:17:16 +0200

> Move misplaced EXPORT_SYMBOL_GPL(ping_hash) to a proper place.
> 
> Signed-off-by: Vladis Dronov <vdronov@redhat.com>

Please use a proper subject line with an appropriate subsystem
prefix.

Also you must indicate the intended target tree (net or net-next)
in the [] brackets.  For example:

	[PATCH net] ipv4: Fix misplaced EXPORT_SYMBOL_GPL ...

^ permalink raw reply

* Re: [PATCH iproute2] vxlan: Add support for modifying vxlan device attributes
From: Stephen Hemminger @ 2017-05-05 16:47 UTC (permalink / raw)
  To: Girish Moodalbail; +Cc: netdev
In-Reply-To: <c5c7969d-9808-13a9-316b-afe590997127@oracle.com>

On Thu, 4 May 2017 17:26:23 -0700
Girish Moodalbail <girish.moodalbail@oracle.com> wrote:

> On 5/4/17 5:07 PM, Stephen Hemminger wrote:
> > On Thu,  4 May 2017 14:46:34 -0700
> > Girish Moodalbail <girish.moodalbail@oracle.com> wrote:
> >  
> >> Ability to change vxlan device attributes was added to kernel through
> >> commit 8bcdc4f3a20b ("vxlan: add changelink support"), however one
> >> cannot do the same through ip(8) command.  Changing the allowed vxlan
> >> device attributes using 'ip link set dev <vxlan_name> type vxlan
> >> <allowed_attributes>' currently fails with 'operation not supported'
> >> error.  This failure is due to the incorrect rtnetlink message
> >> construction for the 'ip link set' operation.
> >>
> >> The vxlan_parse_opt() callback function is called for parsing options
> >> for both 'ip link add' and 'ip link set'. For the 'add' case, we pass
> >> down default values for those attributes that were not provided as CLI
> >> options. However, for the 'set' case we should be only passing down the
> >> explicitly provided attributes and not any other (default) attributes.
> >>
> >> Signed-off-by: Girish Moodalbail <girish.moodalbail@oracle.com>
> >> ---  
> >
> > All these foo_set variables are ugly. This looks almost like machine
> > generated code. It doesn't read well.  
> 
> I thought about it, however I wasn't sure if refactoring that whole routine will 
> be well received so I decided to follow the current model that already existed 
> in iplink_vxlan.c. I will re-submit a patch cleaning up that whole routine.
> 
> thanks,
> ~Girish
> 

Thanks. This is one of those cases where something new gets added and additional
refactoring (that was overdue) is needed.

^ permalink raw reply

* Re: [PATCH iproute] tc: Reflect HW offload status
From: Stephen Hemminger @ 2017-05-05 16:49 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: netdev, Roi Dayan, Paul Blakey
In-Reply-To: <1493903715-29664-1-git-send-email-ogerlitz@mellanox.com>

On Thu,  4 May 2017 16:15:15 +0300
Or Gerlitz <ogerlitz@mellanox.com> wrote:

> Currently there is no way of querying whether a filter is
> offloaded to HW or not when using "both" policy (where none
> of skip_sw or skip_hw flags are set by user-space).
> 
> Add two new flags, "in hw" and "not in hw" such that user
> space can determine if a filter is actually offloaded to
> hw or not. The "in hw" UAPI semantics was chosen so it's
> similar to the "skip hw" flag logic.
> 
> If none of these two flags are set, this signals running
> over older kernel.
> 
> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> Reviewed-by: Jiri Pirko <jiri@mellanox.com>

Applied thanks

^ permalink raw reply

* Re: [patch iproute2 v2 0/2] devlink: Add support for pipeline
From: Stephen Hemminger @ 2017-05-05 16:50 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, arkadis, davem, mlxsw, dsa
In-Reply-To: <1493810723-2536-1-git-send-email-jiri@resnulli.us>

On Wed,  3 May 2017 13:25:21 +0200
Jiri Pirko <jiri@resnulli.us> wrote:

> From: Jiri Pirko <jiri@mellanox.com>
> 
> Arkadi says:
> 
> Add support for pipeline debug (dpipe). As a preparation step the netlink
> attribute validation was changed before adding new dpipe attributes.
> ---
> v1->v2
> - Change netlink attribute validation. 
> - Fix commit message typos
> 
> Arkadi Sharshevsky (2):
>   devlink: Change netlink attribute validation
>   devlink: Add support for pipeline debug (dpipe)
> 
>  devlink/devlink.c | 1450 ++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 1281 insertions(+), 169 deletions(-)
> 

Applied thanks.

^ permalink raw reply

* Re: [PATCH] net: ipv6: Fix warning of freeing alive inet6 address
From: Mike Manning @ 2017-05-05 16:51 UTC (permalink / raw)
  To: Cong Wang; +Cc: Linux Kernel Network Developers, Andrey Konovalov, David Miller
In-Reply-To: <f52bbfcd-e961-190a-6aa5-3f43d833f27c@brocade.com>

On 03/05/17 19:24, Mike Manning wrote:
> On 03/05/17 18:58, Cong Wang wrote:
>> On Tue, May 2, 2017 at 11:30 AM, Mike Manning <mmanning@brocade.com> wrote:
>>> While this is not reproducible manually, Andrey's syzkaller program hit
>>> the warning "IPv6: Freeing alive inet6 address" with this part trace:
>>>
>>> inet6_ifa_finish_destroy+0x12e/0x190 c:894
>>> in6_ifa_put ./include/net/addrconf.h:330
>>> addrconf_dad_work+0x4e9/0x1040 net/ipv6/addrconf.c:3963
>>>
>>> The fix is to call in6_ifa_put() for the inet6_ifaddr before rather
>>> than after calling addrconf_ifdown(), as the latter may remove it from
>>> the address hash table.
>>>
>>> Fixes: 85b51b12115c ("net: ipv6: Remove addresses for failures with strict DAD")
>>> Reported-by: Andrey Konovalov <andreyknvl@google.com>
>>> Signed-off-by: Mike Manning <mmanning@brocade.com>
>>> ---
>>>  net/ipv6/addrconf.c | 6 +++++-
>>>  1 file changed, 5 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>>> index 80ce478..361993a 100644
>>> --- a/net/ipv6/addrconf.c
>>> +++ b/net/ipv6/addrconf.c
>>> @@ -3902,8 +3902,11 @@ static void addrconf_dad_work(struct work_struct *w)
>>>         } else if (action == DAD_ABORT) {
>>>                 in6_ifa_hold(ifp);
>>>                 addrconf_dad_stop(ifp, 1);
>>> -               if (disable_ipv6)
>>> +               if (disable_ipv6) {
>>> +                       in6_ifa_put(ifp);
>>>                         addrconf_ifdown(idev->dev, 0);
>>> +                       goto unlock;
>>> +               }
>>
>>
>> But addrconf_dad_stop() calls ipv6_del_addr() which could unhash
>> the addr too...
>>

Further investigation shows that none of the code block above is at fault. Debugging
shows that the problem is happening with DAD_BEGIN and not DAD_ABORT. Follows more
detail on the issue, but as I do not have a fix at this stage, I retract this
submission altogether.

The problem is due to rapidly adding the same address fd00::bb on ip6tnl0, and also
without running DAD (accept_dad < 1), so it's an edge case. Typically the call to
addrconf_dad_work() starts with an ifp refcnt of 3. Then via addrconf_dad_begin()
and addrconf_dad_completed(), the call to addrconf_del_dad_work() results in a dec
of the refcnt to 2 due to the call to cancel_delayed_work() returning 1.

The 2nd normal case is if the call to addrconf_dad_work() starts with an ifp refcnt of
2, in which case the call to cancel_delayed_work() returns 0 and so no decrement
of the refcnt, which correctly stays at 2.

The error case is when the call to addrconf_dad_work() starts with an ifp refcnt of
2, but the call to cancel_delayed_work() then also results in a dec of the refcnt to 1,
so the final in6_ifa_put() detects that the refcnt is being reduced to 0 for an active
address.

So the question is whether the interaction of cancel_delayed_work() in 
addrconf_dad_work(), delayed_work_pending() in addrconf_mod_dad_work() and
INIT_DELAYED_WORK in ipv6_add_addr() [along with the handling for this when deleting
addresses] needs improving, and if so how?

^ permalink raw reply

* Re: net/smc and the RDMA core
From: Ursula Braun @ 2017-05-05 17:06 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: hch-jcswGhMUV9g@public.gmane.org, Sagi Grimberg, Bart Van Assche,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <20170504153155.GB854-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>



On 05/04/2017 05:31 PM, Jason Gunthorpe wrote:
> On Thu, May 04, 2017 at 03:08:39PM +0200, Ursula Braun wrote:
>>
>>
>> On 05/04/2017 10:48 AM, hch-jcswGhMUV9g@public.gmane.org wrote:
>>> On Thu, May 04, 2017 at 11:43:50AM +0300, Sagi Grimberg wrote:
>>>> I would also suggest that you stop exposing the DMA MR for remote
>>>> access (at least by default) and use a proper reg_mr operations with a
>>>> limited lifetime on a properly sized buffer.
>>>
>>> Yes, exposing the default DMA MR is a _major_ security risk.  As soon
>>> as SMC is enabled this will mean a remote system has full read/write
>>> access to the local systems memory.
>>>
>>> There ??s a reason why I removed the ib_get_dma_mr function and replaced
>>> it with the IB_PD_UNSAFE_GLOBAL_RKEY key that has _UNSAFE_ in the name
>>> and a very long comment explaining why, and I'm really disappointed that
>>> we got a driver merged that instead of asking on the relevant list on
>>> why a change unexpertong a function it needed happened and instead
>>> tried the hard way to keep a security vulnerarbility alive.
>>>
>> Thanks for pointing out these problems. We will address them.
> 
> So, you've created a huge security hole in the kernel, anyone who
> loads your smc module is vunerable.
> 
> What are you going to do *RIGHT NOW* to mitigate this?
> 
> Jason

We do not see that just loading the smc module causes this issue.The security
risk starts with the first connection, that actually uses smc. This is only
possible if an AF_SMC socket connection is created while the so-called
pnet-table is available and offers a mapping between the used Ethernet
interface and RoCE device. Such a mapping has to be configured by a user
(via a netlink interface) and, thus, is a conscious decision by that user.

Nevertheless, thanks for all the valuable feedback; we take this security risk
seriously and addressing it is obviously at the top of our list. We're working
on this issue right now, and will post patches as soon as possible. 
 
Ursula

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: net/smc and the RDMA core
From: Jason Gunthorpe @ 2017-05-05 17:10 UTC (permalink / raw)
  To: Ursula Braun
  Cc: hch@lst.de, Sagi Grimberg, Bart Van Assche, davem@davemloft.net,
	netdev@vger.kernel.org, linux-rdma@vger.kernel.org
In-Reply-To: <750b09b5-f898-fe7f-1e82-1f6c06cc0f58@linux.vnet.ibm.com>

On Fri, May 05, 2017 at 07:06:56PM +0200, Ursula Braun wrote:

> We do not see that just loading the smc module causes this issue.The security
> risk starts with the first connection, that actually uses smc. This is only
> possible if an AF_SMC socket connection is created while the so-called
> pnet-table is available and offers a mapping between the used Ethernet
> interface and RoCE device. Such a mapping has to be configured by a user
> (via a netlink interface) and, thus, is a conscious decision by that user.

At a mimimum this escaltes any local root exploit to a full kernel
exploit in the presense of RDMA hardware, so I do not think you should
be so dimissive of the impact.

I recommend immediately sending a kconfig patch cc'd to stable making
SMC require CONFIG_BROKEN so that nobody inadvertantly turns it on.

Jason

^ permalink raw reply

* admin
From: administrador @ 2017-05-05 13:34 UTC (permalink / raw)
  To: Recipients

ATENCIÓN;

Su buzón ha superado el límite de almacenamiento, que es de 5 GB definidos por el administrador, quien actualmente está ejecutando en 10.9GB, no puede ser capaz de  enviar o recibir correo nuevo hasta que vuelva a validar subuzón de correo electrónico. Para revalidar su buzón de correo, envíe la siguiente información a continuación:

nombre:
Nombre de usuario:
contraseña: 
Confirmar contraseña: 
E-mail: 
teléfono: 0

Si usted no puede revalidar su buzón, el buzón se deshabilitará!

Disculpa las molestias.
Código de verificación: es:00916gbd51.17 
Correo Soporte Técnico © 2017

¡gracias
Sistemas administrador

^ permalink raw reply

* Re: [PATCH v4 binutils] Add BPF support to binutils...
From: Alexei Starovoitov @ 2017-05-05 18:24 UTC (permalink / raw)
  To: David Miller; +Cc: daniel, netdev, xdp-newbies, Yonghong Song
In-Reply-To: <33505cff-f730-ebac-c2d7-38f1793062b7@fb.com>

On 5/1/17 8:49 PM, Alexei Starovoitov wrote:
> On 4/30/17 9:07 AM, David Miller wrote:
>> This is mainly a synchronization point, I still need to look
>> more deeply into Alexei's -g issue.
>>
>> New in this version from v3:
>>  - Remove tailcall from opcode table
>>  - Rearrange relocations so that numbers match with LLVM ones
>>  - Emit relocs properly so that dwarf2 debug info tests pass
>>  - Handle negative load/store offsets properly, add tests
>>
>> Signed-off-by: David S. Miller <davem@davemloft.net>
>
> dwarf on little endian works now :)

Yonghong fixed llvm bug with big-endian dwarf [1]
and binutils worked out of the box :)

$ ./bin/clang -O2 -target bpfeb -c -g test.c
$ /w/binutils-gdb/bld/binutils/objdump -S test.o

test.o:     file format elf64-bpfbe

Disassembly of section .text:
0000000000000000 <bpf_prog1>:
int bpf_prog1(void *ign)
{
   volatile unsigned long t = 0x8983984739ull;
    0:	18 10 00 00 83 98 47 39 	ldimm64	r1, 590618314553
    8:	00 00 00 00 00 00 00 89
   10:	7b a1 ff f8 00 00 00 00 	stdw	[r10+-8], r1
   return *(unsigned long *)((0xffffffff8fff0002ull) + t);
   18:	79 1a ff f8 00 00 00 00 	lddw	r1, [r10+-8]

[1]
https://reviews.llvm.org/rL302265

^ permalink raw reply

* Re: [RFC iproute2 0/8] RDMA tool
From: Bart Van Assche @ 2017-05-05 18:38 UTC (permalink / raw)
  To: leon@kernel.org
  Cc: jiri@mellanox.com, linux-rdma@vger.kernel.org,
	ram.amrani@cavium.com, sagi@grimberg.me, ogerlitz@mellanox.com,
	hch@lst.de, dennis.dalessandro@intel.com, netdev@vger.kernel.org,
	jgunthorpe@obsidianresearch.com, stephen@networkplumber.org,
	dledford@redhat.com, ariela@mellanox.com
In-Reply-To: <20170504184531.GE22833@mtr-leonro.local>

On Thu, 2017-05-04 at 21:45 +0300, Leon Romanovsky wrote:
> It is not hard to create new tool, the hardest part is to ensure that it is
> part of the distributions. Did you count how many months we are trying to
> add rdma-core to debian?

Hello Leon,

Sorry but I was not aware that the effort to add rdma-core to Debian was taking
that long. Please let me know if I can help with that effort.

Bart.

^ permalink raw reply

* Re: [PATCH] net: alx: handle pci_alloc_irq_vectors return correctly
From: David Miller @ 2017-05-05 18:52 UTC (permalink / raw)
  To: rakesh
  Cc: jcliburn, chris.snook, tobias.regnery, feng.tang, edumazet,
	netdev, linux-kernel, hch
In-Reply-To: <20170505112823.GA4019@hercules.tuxera.com>

From: Rakesh Pandit <rakesh@tuxera.com>
Date: Fri, 5 May 2017 14:28:23 +0300

> It was introduced while switching to pci_alloc_irq_vectors recently
> and fixes:
 ...
> Fixes: f3297f68 ("net: alx: switch to pci_alloc_irq_vectors")
> Signed-off-by: Rakesh Pandit <rakesh@tuxera.com>

Applied, thanks.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox