Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 net-next 1/2] netfilter: check if the socket netns is correct.
From: Flavio Leitner @ 2018-06-27 13:34 UTC (permalink / raw)
  To: netdev
  Cc: Eric Dumazet, Paolo Abeni, David Miller, Florian Westphal,
	netfilter-devel, Flavio Leitner
In-Reply-To: <20180627133426.3858-1-fbl@redhat.com>

Netfilter assumes that if the socket is present in the skb, then
it can be used because that reference is cleaned up while the skb
is crossing netns.

We want to change that to preserve the socket reference in a future
patch, so this is a preparation updating netfilter to check if the
socket netns matches before use it.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
---
 include/net/netfilter/nf_log.h         | 3 ++-
 net/ipv4/netfilter/nf_log_ipv4.c       | 8 ++++----
 net/ipv6/netfilter/nf_log_ipv6.c       | 8 ++++----
 net/netfilter/nf_conntrack_broadcast.c | 2 +-
 net/netfilter/nf_log_common.c          | 5 +++--
 net/netfilter/nf_nat_core.c            | 6 +++++-
 net/netfilter/nft_meta.c               | 9 ++++++---
 net/netfilter/nft_socket.c             | 5 ++++-
 net/netfilter/xt_cgroup.c              | 6 ++++--
 net/netfilter/xt_owner.c               | 2 +-
 net/netfilter/xt_recent.c              | 3 ++-
 net/netfilter/xt_socket.c              | 8 ++++++++
 12 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index e811ac07ea94..0d3920896d50 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -106,7 +106,8 @@ int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 			   u8 proto, int fragment, unsigned int offset,
 			   unsigned int logflags);
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk);
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+			    struct sock *sk);
 void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
 			       unsigned int hooknum, const struct sk_buff *skb,
 			       const struct net_device *in,
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 4388de0e5380..1e6f28c97d3a 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -35,7 +35,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct nf_log_buf *m,
+static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
 			     const struct nf_loginfo *info,
 			     const struct sk_buff *skb, unsigned int iphoff)
 {
@@ -183,7 +183,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 			/* Max length: 3+maxlen */
 			if (!iphoff) { /* Only recurse once. */
 				nf_log_buf_add(m, "[");
-				dump_ipv4_packet(m, info, skb,
+				dump_ipv4_packet(net, m, info, skb,
 					    iphoff + ih->ihl*4+sizeof(_icmph));
 				nf_log_buf_add(m, "] ");
 			}
@@ -251,7 +251,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & NF_LOG_UID) && !iphoff)
-		nf_log_dump_sk_uid_gid(m, skb->sk);
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
 	if (!iphoff && skb->mark)
@@ -333,7 +333,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
 	if (in != NULL)
 		dump_ipv4_mac_header(m, loginfo, skb);
 
-	dump_ipv4_packet(m, loginfo, skb, 0);
+	dump_ipv4_packet(net, m, loginfo, skb, 0);
 
 	nf_log_buf_close(m);
 }
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index b397a8fe88b9..c6bf580d0f33 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -36,7 +36,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv6_packet(struct nf_log_buf *m,
+static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
 			     const struct nf_loginfo *info,
 			     const struct sk_buff *skb, unsigned int ip6hoff,
 			     int recurse)
@@ -258,7 +258,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 			/* Max length: 3+maxlen */
 			if (recurse) {
 				nf_log_buf_add(m, "[");
-				dump_ipv6_packet(m, info, skb,
+				dump_ipv6_packet(net, m, info, skb,
 						 ptr + sizeof(_icmp6h), 0);
 				nf_log_buf_add(m, "] ");
 			}
@@ -278,7 +278,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & NF_LOG_UID) && recurse)
-		nf_log_dump_sk_uid_gid(m, skb->sk);
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
 	if (recurse && skb->mark)
@@ -365,7 +365,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
 	if (in != NULL)
 		dump_ipv6_mac_header(m, loginfo, skb);
 
-	dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+	dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
 
 	nf_log_buf_close(m);
 }
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index a1086bdec242..5423b197d98a 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -32,7 +32,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 	__be32 mask = 0;
 
 	/* we're only interested in locally generated packets */
-	if (skb->sk == NULL)
+	if (skb->sk == NULL || !net_eq(nf_ct_net(ct), sock_net(skb->sk)))
 		goto out;
 	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
 		goto out;
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index dc61399e30be..a8c5c846aec1 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -132,9 +132,10 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
 
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+			    struct sock *sk)
 {
-	if (!sk || !sk_fullsock(sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk)))
 		return;
 
 	read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 46f9df99d276..86df2a1666fd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -108,6 +108,7 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 	struct flowi fl;
 	unsigned int hh_len;
 	struct dst_entry *dst;
+	struct sock *sk = skb->sk;
 	int err;
 
 	err = xfrm_decode_session(skb, &fl, family);
@@ -119,7 +120,10 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
+	if (sk && !net_eq(net, sock_net(sk)))
+		sk = NULL;
+
+	dst = xfrm_lookup(net, dst, &fl, sk, 0);
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 1105a23bda5e..2b94dcc43456 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -107,7 +107,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_SKUID:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 
 		read_lock_bh(&sk->sk_callback_lock);
@@ -123,7 +124,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_SKGID:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 
 		read_lock_bh(&sk->sk_callback_lock);
@@ -214,7 +216,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 #ifdef CONFIG_CGROUP_NET_CLASSID
 	case NFT_META_CGROUP:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 		*dest = sock_cgroup_classid(&sk->sk_cgrp_data);
 		break;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 74e1b3bd6954..998c2b546f6d 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -23,6 +23,9 @@ static void nft_socket_eval(const struct nft_expr *expr,
 	struct sock *sk = skb->sk;
 	u32 *dest = &regs->data[priv->dreg];
 
+	if (sk && !net_eq(nft_net(pkt), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		switch(nft_pf(pkt)) {
 		case NFPROTO_IPV4:
@@ -39,7 +42,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
 			return;
 		}
 
-	if(!sk) {
+	if (!sk) {
 		nft_reg_store8(dest, 0);
 		return;
 	}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7df2dece57d3..5d92e1781980 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -72,8 +72,9 @@ static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_cgroup_info_v0 *info = par->matchinfo;
+	struct sock *sk = skb->sk;
 
-	if (skb->sk == NULL || !sk_fullsock(skb->sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
 		return false;
 
 	return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
@@ -85,8 +86,9 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_cgroup_info_v1 *info = par->matchinfo;
 	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
 	struct cgroup *ancestor = info->priv;
+	struct sock *sk = skb->sk;
 
-	if (!skb->sk || !sk_fullsock(skb->sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
 		return false;
 
 	if (ancestor)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 3d705c688a27..46686fb73784 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -67,7 +67,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sock *sk = skb_to_full_sk(skb);
 	struct net *net = xt_net(par);
 
-	if (sk == NULL || sk->sk_socket == NULL)
+	if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk)))
 		return (info->match ^ info->invert) == 0;
 	else if (info->match & info->invert & XT_OWNER_SOCKET)
 		/*
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 07085c22b19c..f44de4bc2100 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -265,7 +265,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	/* use TTL as seen before forwarding */
-	if (xt_out(par) != NULL && skb->sk == NULL)
+	if (xt_out(par) != NULL &&
+	    (!skb->sk || !net_eq(net, sock_net(skb->sk))))
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5c0779c4fa3c..0472f3472842 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -56,8 +56,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
+	if (!net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
+
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
@@ -113,8 +117,12 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
+	if (!net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
+
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
-- 
2.14.3

^ permalink raw reply related

* [PATCH v2 net-next 2/2] skbuff: preserve sock reference when scrubbing the skb.
From: Flavio Leitner @ 2018-06-27 13:34 UTC (permalink / raw)
  To: netdev
  Cc: Eric Dumazet, Paolo Abeni, David Miller, Florian Westphal,
	netfilter-devel, Flavio Leitner
In-Reply-To: <20180627133426.3858-1-fbl@redhat.com>

The sock reference is lost when scrubbing the packet and that breaks
TSQ (TCP Small Queues) and XPS (Transmit Packet Steering) causing
performance impacts of about 50% in a single TCP stream when crossing
network namespaces.

XPS breaks because the queue mapping stored in the socket is not
available, so another random queue might be selected when the stack
needs to transmit something like a TCP ACK, or TCP Retransmissions.
That causes packet re-ordering and/or performance issues.

TSQ breaks because it orphans the packet while it is still in the
host, so packets are queued contributing to the buffer bloat problem.

Preserving the sock reference fixes both issues. The socket is
orphaned anyways in the receiving path before any relevant action
and on TX side the netfilter checks if the reference is local before
use it.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
---
 Documentation/networking/ip-sysctl.txt | 10 +++++-----
 net/core/skbuff.c                      |  1 -
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ce8fbf5aa63c..f4c042be0216 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -733,11 +733,11 @@ tcp_limit_output_bytes - INTEGER
 	Controls TCP Small Queue limit per tcp socket.
 	TCP bulk sender tends to increase packets in flight until it
 	gets losses notifications. With SNDBUF autotuning, this can
-	result in a large amount of packets queued in qdisc/device
-	on the local machine, hurting latency of other flows, for
-	typical pfifo_fast qdiscs.
-	tcp_limit_output_bytes limits the number of bytes on qdisc
-	or device to reduce artificial RTT/cwnd and reduce bufferbloat.
+	result in a large amount of packets queued on the local machine
+	(e.g.: qdiscs, CPU backlog, or device) hurting latency of other
+	flows, for typical pfifo_fast qdiscs.  tcp_limit_output_bytes
+	limits the number of bytes on qdisc or device to reduce artificial
+	RTT/cwnd and reduce bufferbloat.
 	Default: 262144
 
 tcp_challenge_ack_limit - INTEGER
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b1f274f22d85..f59e98ca72c5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4911,7 +4911,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 		return;
 
 	ipvs_reset(skb);
-	skb_orphan(skb);
 	skb->mark = 0;
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
-- 
2.14.3

^ permalink raw reply related

* Re: [PATCH net-next] mlx4: do not use rwlock in fast path
From: Eric Dumazet @ 2018-06-27 13:49 UTC (permalink / raw)
  To: Tariq Toukan, Eric Dumazet, David Miller
  Cc: netdev, Shawn Bohrer, Shay Agroskin, Eran Ben Elisha
In-Reply-To: <05ae066b-873d-159b-4ac2-ab39120c949b@mellanox.com>



On 06/27/2018 05:11 AM, Tariq Toukan wrote:
> 
> 
> On 09/02/2017 7:10 PM, Eric Dumazet wrote:
>> From: Eric Dumazet <edumazet@google.com>
>>
>> Using a reader-writer lock in fast path is silly, when we can
>> instead use RCU or a seqlock.
>>
>> For mlx4 hwstamp clock, a seqlock is the way to go, removing
>> two atomic operations and false sharing.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Cc: Tariq Toukan <tariqt@mellanox.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx4/en_clock.c |   35 ++++++++--------
>>   drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |    2
>>   2 files changed, 19 insertions(+), 18 deletions(-)
>>
> 
> Hi Eric,
> 
> When my peer, Shay, modified mlx5 to adopt this same locking scheme/type, he noticed a degradation in packet rate.
> He got back to testing mlx4 and also noticed a degradation introduced by this patch.
> 
> Perf numbers (single ring):
> 
> mlx4:
> with rw-lock: ~8.54M pps
> with seq-lock: ~8.51M pps
> 
> mlx5:
> With rw-lock: ~14.94M pps
> With seq-lock: ~14.48M pps
> 
> Actually, this can be explained by the analysis below.
> In short, number of readers is significantly larger than of writers. Hence optimizing the readers flow would give better numbers. The issue is, the read/write lock might cause writers starvation. Maybe RCU fits best here?
> 
> Degradation analysis:
> The patch changes the lock type which protects reads and updates of a variable ( (struct mlx4_en_dev).clock variable)
> This variable is used to convert the hw timestamp into skb->hwtstamps.
> This variable is read for each transmitted/received packet and updated only via ptp module and some overflow periodic work we have (maximum of 10 times per second)
> Meaning that there are much more readers than writers, and it’s best to optimize the readers flow.
>

Hi Tariq

Are you sure you enabled time stamps in your tests ?

mlx4_en_fill_hwtstamps() is _really_ called 8,540,000 times per second,
meaning a same amount of read_lock_irqsave()/read_unlock_irqrestore() is performed ?

You have a pretty damn good CPU it seems.

seqlock has no cost for a reader [1], other than reading one integer value and testing it.
[1] If this value never change (and is on a clean cache line).

Really this looks like ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL in your tests.

The numbers you gave just give one cycle difference per packet (half a nano second),
so I really doubt adding back the heavy  read_lock_irqsave()/read_unlock_irqrestore()
could be faster.

Conceptually seqlock is some form of RCU, it really optimizes the readers flow.

Thanks

^ permalink raw reply

* Re: [PATCH v2 net-next 2/2] skbuff: preserve sock reference when scrubbing the skb.
From: Eric Dumazet @ 2018-06-27 13:56 UTC (permalink / raw)
  To: Flavio Leitner, netdev
  Cc: Eric Dumazet, Paolo Abeni, David Miller, Florian Westphal,
	netfilter-devel
In-Reply-To: <20180627133426.3858-3-fbl@redhat.com>



On 06/27/2018 06:34 AM, Flavio Leitner wrote:
> The sock reference is lost when scrubbing the packet and that breaks
> TSQ (TCP Small Queues) and XPS (Transmit Packet Steering) causing
> performance impacts of about 50% in a single TCP stream when crossing
> network namespaces.
> 
> XPS breaks because the queue mapping stored in the socket is not
> available, so another random queue might be selected when the stack
> needs to transmit something like a TCP ACK, or TCP Retransmissions.
> That causes packet re-ordering and/or performance issues.

Note we do not care if another random queue is selected when TCP retransmit
after timeout happens.

The problem is really when sending a normal train of packets (being retransmission
or not). We want all of them going through one queue to avoid reorders.

After an idle period (no packets are in any qdisc/NIC queue), we absolutely
are okay to select another "random queue".

This choice is driven by skb->ooo_okay

Most TCP ACK packets are sent while no prior packet is in qdisc, so should
have ooo_okay set to 1.

> 
> TSQ breaks because it orphans the packet while it is still in the
> host, so packets are queued contributing to the buffer bloat problem.
> 
> Preserving the sock reference fixes both issues. The socket is
> orphaned anyways in the receiving path before any relevant action
> and on TX side the netfilter checks if the reference is local before
> use it.
> 
> Signed-off-by: Flavio Leitner <fbl@redhat.com>
> ---
>  Documentation/networking/ip-sysctl.txt | 10 +++++-----
>  net/core/skbuff.c                      |  1 -
>  2 files changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
> index ce8fbf5aa63c..f4c042be0216 100644
> --- a/Documentation/networking/ip-sysctl.txt
> +++ b/Documentation/networking/ip-sysctl.txt
> @@ -733,11 +733,11 @@ tcp_limit_output_bytes - INTEGER
>  	Controls TCP Small Queue limit per tcp socket.
>  	TCP bulk sender tends to increase packets in flight until it
>  	gets losses notifications. With SNDBUF autotuning, this can
> -	result in a large amount of packets queued in qdisc/device
> -	on the local machine, hurting latency of other flows, for
> -	typical pfifo_fast qdiscs.
> -	tcp_limit_output_bytes limits the number of bytes on qdisc
> -	or device to reduce artificial RTT/cwnd and reduce bufferbloat.
> +	result in a large amount of packets queued on the local machine
> +	(e.g.: qdiscs, CPU backlog, or device) hurting latency of other
> +	flows, for typical pfifo_fast qdiscs.  tcp_limit_output_bytes
> +	limits the number of bytes on qdisc or device to reduce artificial
> +	RTT/cwnd and reduce bufferbloat.
>  	Default: 262144
>  
>  tcp_challenge_ack_limit - INTEGER
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index b1f274f22d85..f59e98ca72c5 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -4911,7 +4911,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
>  		return;
>  
>  	ipvs_reset(skb);
> -	skb_orphan(skb);
>  	skb->mark = 0;
>  }
>  EXPORT_SYMBOL_GPL(skb_scrub_packet);
> 

^ permalink raw reply

* [PATCH bpf 2/4] xsk: frame could be completed more than once in SKB path
From: Magnus Karlsson @ 2018-06-27 14:02 UTC (permalink / raw)
  To: magnus.karlsson, bjorn.topel, ast, daniel, netdev; +Cc: qi.z.zhang, pavel
In-Reply-To: <1530108136-4984-1-git-send-email-magnus.karlsson@intel.com>

Fixed a bug in which a frame could be completed more than once
when an error was returned from dev_direct_xmit(). The code
erroneously retried sending the message leading to multiple
calls to the SKB destructor and therefore multiple completions
of the same buffer to user space.

The error code in this case has been changed from EAGAIN to EBUSY
in order to tell user space that the sending of the packet failed
and the buffer has been return to user space through the completion
queue.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Reported-by: Pavel Odintsov <pavel@fastnetmon.com>
---
 net/xdp/xsk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3b3410ada097..d482f727f4c2 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -268,15 +268,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
 		skb->destructor = xsk_destruct_skb;
 
 		err = dev_direct_xmit(skb, xs->queue_id);
+		xskq_discard_desc(xs->tx);
 		/* Ignore NET_XMIT_CN as packet might have been sent */
 		if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
-			err = -EAGAIN;
-			/* SKB consumed by dev_direct_xmit() */
+			/* SKB completed but not sent */
+			err = -EBUSY;
 			goto out;
 		}
 
 		sent_frame = true;
-		xskq_discard_desc(xs->tx);
 	}
 
 out:
-- 
2.7.4

^ permalink raw reply related

* [PATCH bpf 3/4] samples/bpf: deal with EBUSY return code from sendmsg in xdpsock sample
From: Magnus Karlsson @ 2018-06-27 14:02 UTC (permalink / raw)
  To: magnus.karlsson, bjorn.topel, ast, daniel, netdev; +Cc: qi.z.zhang, pavel
In-Reply-To: <1530108136-4984-1-git-send-email-magnus.karlsson@intel.com>

Sendmsg in the SKB path of AF_XDP can now return EBUSY when a packet
was discarded and completed by the driver. Just ignore this message
in the sample application.

Fixes: b4b8faa1ded7 ("samples/bpf: sample application and documentation for AF_XDP sockets")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Reported-by: Pavel Odintsov <pavel@fastnetmon.com>
---
 samples/bpf/xdpsock_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index d69c8d78d3fd..aec3a61fac44 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -729,7 +729,7 @@ static void kick_tx(int fd)
 	int ret;
 
 	ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
-	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN)
+	if (ret >= 0 || errno == EAGAIN || errno == EBUSY)
 		return;
 	lassert(0);
 }
-- 
2.7.4

^ permalink raw reply related

* [PATCH bpf 0/4] Bug fixes to the SKB TX path of AF_XDP
From: Magnus Karlsson @ 2018-06-27 14:02 UTC (permalink / raw)
  To: magnus.karlsson, bjorn.topel, ast, daniel, netdev; +Cc: qi.z.zhang, pavel

This patch set fixes three bugs in the SKB TX path of AF_XDP.
Details in the individual commits.

The structure of the patch set is as follows:

Patch 1: Fix for lost completion message
Patch 2-3: Fix for possible multiple completions of single packet
Patch 4: Fix potential race during error

/Magnus

Magnus Karlsson (4):
  xsk: fix potential lost completion message in SKB path
  xsk: frame could be completed more than once in SKB path
  samples/bpf: deal with EBUSY return code from sendmsg in xdpsock
    sample
  xsk: fix potential race in SKB TX completion code

 include/net/xdp_sock.h     |  4 ++++
 net/xdp/xsk.c              | 10 +++++++---
 net/xdp/xsk_queue.h        |  9 ++-------
 samples/bpf/xdpsock_user.c |  2 +-
 4 files changed, 14 insertions(+), 11 deletions(-)

^ permalink raw reply

* [PATCH bpf 1/4] xsk: fix potential lost completion message in SKB path
From: Magnus Karlsson @ 2018-06-27 14:02 UTC (permalink / raw)
  To: magnus.karlsson, bjorn.topel, ast, daniel, netdev; +Cc: qi.z.zhang, pavel
In-Reply-To: <1530108136-4984-1-git-send-email-magnus.karlsson@intel.com>

The code in xskq_produce_addr erroneously checked if there
was up to LAZY_UPDATE_THRESHOLD amount of space in the completion
queue. It only needs to check if there is one slot left in the
queue. This bug could under some circumstances lead to a WARN_ON_ONCE
being triggered and the completion message to user space being lost.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Reported-by: Pavel Odintsov <pavel@fastnetmon.com>
---
 net/xdp/xsk_queue.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index ef6a6f0ec949..52ecaf770642 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
 	return (entries > dcnt) ? dcnt : entries;
 }
 
-static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
-{
-	return q->nentries - (producer - q->cons_tail);
-}
-
 static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
 {
-	u32 free_entries = xskq_nb_free_lazy(q, producer);
+	u32 free_entries = q->nentries - (producer - q->cons_tail);
 
 	if (free_entries >= dcnt)
 		return free_entries;
@@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
 {
 	struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
 
-	if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
+	if (xskq_nb_free(q, q->prod_tail, 1) == 0)
 		return -ENOSPC;
 
 	ring->desc[q->prod_tail++ & q->ring_mask] = addr;
-- 
2.7.4

^ permalink raw reply related

* [PATCH bpf 4/4] xsk: fix potential race in SKB TX completion code
From: Magnus Karlsson @ 2018-06-27 14:02 UTC (permalink / raw)
  To: magnus.karlsson, bjorn.topel, ast, daniel, netdev; +Cc: qi.z.zhang, pavel
In-Reply-To: <1530108136-4984-1-git-send-email-magnus.karlsson@intel.com>

There was a potential race in the TX completion code for
the SKB case when the TX napi thread and the error path
of the sendmsg code could both call the SKB destructor
at the same time. Fixed by introducing a spin_lock in the
destructor.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
---
 include/net/xdp_sock.h | 4 ++++
 net/xdp/xsk.c          | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 9fe472f2ac95..7161856bcf9c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -60,6 +60,10 @@ struct xdp_sock {
 	bool zc;
 	/* Protects multiple processes in the control path */
 	struct mutex mutex;
+	/* Mutual exclusion of NAPI TX thread and sendmsg error paths
+	 * in the SKB destructor callback.
+	 */
+	spinlock_t tx_completion_lock;
 	u64 rx_dropped;
 };
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d482f727f4c2..650c4da8dc5a 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb)
 {
 	u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
 	struct xdp_sock *xs = xdp_sk(skb->sk);
+	unsigned long flags;
 
+	spin_lock_irqsave(&xs->tx_completion_lock, flags);
 	WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
+	spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
 
 	sock_wfree(skb);
 }
@@ -754,6 +757,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 
 	xs = xdp_sk(sk);
 	mutex_init(&xs->mutex);
+	spin_lock_init(&xs->tx_completion_lock);
 
 	local_bh_disable();
 	sock_prot_inuse_add(net, &xsk_proto, 1);
-- 
2.7.4

^ permalink raw reply related

* [PATCH net] net: stmmac: Set DMA buffer size in HW
From: Jose Abreu @ 2018-06-27 14:03 UTC (permalink / raw)
  To: netdev
  Cc: Jose Abreu, David S. Miller, Joao Pinto, Vitor Soares,
	Giuseppe Cavallaro, Alexandre Torgue

This is clearly a bug.

We need to set the DMA buffer size in the HW otherwise corruption can
occur when receiving packets.

This is probably not occuring because of small MTU values and because HW
has a default value internally (which currently is bigger than default
buffer size).

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Vitor Soares <soares@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c  | 12 ++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h  |  2 ++
 drivers/net/ethernet/stmicro/stmmac/hwif.h        |  3 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  2 ++
 4 files changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index d37f17ca62fe..65bc3556bd8f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -407,6 +407,16 @@ static void dwmac4_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
 	}
 }
 
+static void dwmac4_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
+{
+	u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
+
+	value &= ~DMA_RBSZ_MASK;
+	value |= (bfsize << DMA_RBSZ_SHIFT) & DMA_RBSZ_MASK;
+
+	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
+}
+
 const struct stmmac_dma_ops dwmac4_dma_ops = {
 	.reset = dwmac4_dma_reset,
 	.init = dwmac4_dma_init,
@@ -431,6 +441,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = {
 	.set_rx_tail_ptr = dwmac4_set_rx_tail_ptr,
 	.set_tx_tail_ptr = dwmac4_set_tx_tail_ptr,
 	.enable_tso = dwmac4_enable_tso,
+	.set_bfsize = dwmac4_set_bfsize,
 };
 
 const struct stmmac_dma_ops dwmac410_dma_ops = {
@@ -457,4 +468,5 @@ const struct stmmac_dma_ops dwmac410_dma_ops = {
 	.set_rx_tail_ptr = dwmac4_set_rx_tail_ptr,
 	.set_tx_tail_ptr = dwmac4_set_tx_tail_ptr,
 	.enable_tso = dwmac4_enable_tso,
+	.set_bfsize = dwmac4_set_bfsize,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index c63c1fe3f26b..22a4a6dbb1a4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -120,6 +120,8 @@
 
 /* DMA Rx Channel X Control register defines */
 #define DMA_CONTROL_SR			BIT(0)
+#define DMA_RBSZ_MASK			GENMASK(14, 1)
+#define DMA_RBSZ_SHIFT			1
 
 /* Interrupt status per channel */
 #define DMA_CHAN_STATUS_REB		GENMASK(21, 19)
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index e44e7b26ce82..fe8b536b13f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -183,6 +183,7 @@ struct stmmac_dma_ops {
 	void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
 	void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
 	void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
+	void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
 };
 
 #define stmmac_reset(__priv, __args...) \
@@ -235,6 +236,8 @@ struct stmmac_dma_ops {
 	stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args)
 #define stmmac_enable_tso(__priv, __args...) \
 	stmmac_do_void_callback(__priv, dma, enable_tso, __args)
+#define stmmac_set_dma_bfsize(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
 
 struct mac_device_info;
 struct net_device;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cba46b62a1cd..60f59abab009 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1804,6 +1804,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 
 		stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
 				rxfifosz, qmode);
+		stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz,
+				chan);
 	}
 
 	for (chan = 0; chan < tx_channels_count; chan++) {
-- 
2.7.4

^ permalink raw reply related

* Re: [RFC PATCH v2 net-next 01/12] net: core: trivial netif_receive_skb_list() entry point
From: Edward Cree @ 2018-06-27 14:03 UTC (permalink / raw)
  To: Eric Dumazet, linux-net-drivers, netdev; +Cc: davem
In-Reply-To: <3fbb875b-c737-1b23-d56f-816e667e1e7f@gmail.com>

On 27/06/18 01:06, Eric Dumazet wrote:
> On 06/26/2018 11:17 AM, Edward Cree wrote:
>> Just calls netif_receive_skb() in a loop.
> ...
>
>> +void netif_receive_skb_list(struct sk_buff_head *list)
>
> Please use a standard list_head and standard list operators.
>
> (In all your patches)
>
> 1) We do not want to carry a spinlock_t + count per list...
>
> 2) We get nice debugging features with CONFIG_DEBUG_LIST=y
>
> Note that we now have skb->list after 
> commit d4546c2509b1e9cd082e3682dcec98472e37ee5a ("net: Convert GRO SKB handling to list_head.")
So we do.  That hadn't gone in yet on Monday when I last pulled net-next; I'll make sure to use it in the next spin.
Thanks.

^ permalink raw reply

* Re: [PATCH net] net: stmmac: Set DMA buffer size in HW
From: Jose Abreu @ 2018-06-27 14:11 UTC (permalink / raw)
  To: netdev, David S. Miller
  Cc: Joao Pinto, Vitor Soares, Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <2447baa54f044c9c8a1674970061ac4ac9a25e49.1530108148.git.joabreu@synopsys.com>

David,

On 27-06-2018 15:03, Jose Abreu wrote:
> This is clearly a bug.

You will probably have an hard time backporting this because of
the way the callbacks are handled now.

I can send you a patch based on some -stable branch if you prefer.

Thanks and Best Regards,
Jose Miguel Abreu

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add support for CBS QDISC
From: kbuild test robot @ 2018-06-27 14:15 UTC (permalink / raw)
  To: Jose Abreu
  Cc: kbuild-all, netdev, Jose Abreu, David S. Miller, Joao Pinto,
	Vitor Soares, Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <daa3124702201f0d1c30da51f2a90cf432e39ba3.1530103308.git.joabreu@synopsys.com>

[-- Attachment #1: Type: text/plain, Size: 4313 bytes --]

Hi Jose,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Jose-Abreu/net-stmmac-Add-support-for-CBS-QDISC/20180627-214704
config: sh-allyesconfig (attached as .config)
compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=sh 

All warnings (new ones prefixed by >>):

   In file included from ./arch/sh/include/generated/asm/div64.h:1:0,
                    from include/linux/kernel.h:174,
                    from include/linux/list.h:9,
                    from include/linux/timer.h:5,
                    from include/linux/workqueue.h:9,
                    from include/net/pkt_cls.h:6,
                    from drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:7:
   drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c: In function 'tc_setup_cbs':
   include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
     (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
                               ^
>> drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:325:2: note: in expansion of macro 'do_div'
     do_div(value, speed_div);
     ^~~~~~
   include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
     (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
                               ^
   drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:329:2: note: in expansion of macro 'do_div'
     do_div(value, speed_div);
     ^~~~~~

vim +/do_div +325 drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c

   291	
   292	static int tc_setup_cbs(struct stmmac_priv *priv,
   293				struct tc_cbs_qopt_offload *qopt)
   294	{
   295		u32 tx_queues_count = priv->plat->tx_queues_to_use;
   296		u32 queue = qopt->queue;
   297		u32 ptr, speed_div;
   298		u32 mode_to_use;
   299		s64 value;
   300		int ret;
   301	
   302		/* Queue 0 is not AVB capable */
   303		if (queue <= 0 || queue >= tx_queues_count)
   304			return -EINVAL;
   305		if (priv->speed != SPEED_100 && priv->speed != SPEED_1000)
   306			return -EOPNOTSUPP;
   307	
   308		mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
   309		if (mode_to_use == MTL_QUEUE_DCB && qopt->enable) {
   310			ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_AVB);
   311			if (ret)
   312				return ret;
   313	
   314			priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB;
   315		} else if (!qopt->enable) {
   316			return stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_DCB);
   317		}
   318	
   319		/* Port Transmit Rate and Speed Divider */
   320		ptr = (priv->speed == SPEED_100) ? 4 : 8;
   321		speed_div = (priv->speed == SPEED_100) ? 100000 : 1000000;
   322	
   323		/* Final adjustments for HW */
   324		value = qopt->idleslope * 1024 * ptr;
 > 325		do_div(value, speed_div);
   326		priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0);
   327	
   328		value = -qopt->sendslope * 1024UL * ptr;
   329		do_div(value, speed_div);
   330		priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0);
   331	
   332		value = qopt->hicredit * 1024 * 8;
   333		priv->plat->tx_queues_cfg[queue].high_credit = value & GENMASK(31, 0);
   334	
   335		value = qopt->locredit * 1024 * 8;
   336		priv->plat->tx_queues_cfg[queue].low_credit = value & GENMASK(31, 0);
   337	
   338		ret = stmmac_config_cbs(priv, priv->hw,
   339					priv->plat->tx_queues_cfg[queue].send_slope,
   340					priv->plat->tx_queues_cfg[queue].idle_slope,
   341					priv->plat->tx_queues_cfg[queue].high_credit,
   342					priv->plat->tx_queues_cfg[queue].low_credit,
   343					queue);
   344		if (ret)
   345			return ret;
   346	
   347		dev_info(priv->device, "CBS queue %d: send %d, idle %d, hi %d, lo %d\n",
   348				queue, qopt->sendslope, qopt->idleslope,
   349				qopt->hicredit, qopt->locredit);
   350		return 0;
   351	}
   352	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 48915 bytes --]

^ permalink raw reply

* Re: [RFC bpf-next 2/6] net: xdp: RX meta data infrastructure
From: Jesper Dangaard Brouer @ 2018-06-27 14:15 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: Alexei Starovoitov, Daniel Borkmann, neerav.parikh, pjwaskiewicz,
	ttoukan.linux, Tariq Toukan, alexander.h.duyck,
	peter.waskiewicz.jr, Opher Reviv, Rony Efraim, netdev,
	Saeed Mahameed, brouer
In-Reply-To: <20180627024615.17856-3-saeedm@mellanox.com>

On Tue, 26 Jun 2018 19:46:11 -0700
Saeed Mahameed <saeedm@dev.mellanox.co.il> wrote:

> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 2deea7166a34..afe302613ae1 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -138,6 +138,12 @@ xdp_set_data_meta_invalid(struct xdp_buff *xdp)
>  	xdp->data_meta = xdp->data + 1;
>  }
>  
> +static __always_inline void
> +xdp_reset_data_meta(struct xdp_buff *xdp)
> +{
> +	xdp->data_meta = xdp->data_hard_start;
> +}

This is WRONG ... it should be:

 xdp->data_meta = xdp->data;

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH v2 net-next 1/2] netfilter: check if the socket netns is correct.
From: Florian Westphal @ 2018-06-27 14:22 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: netdev, Eric Dumazet, Paolo Abeni, David Miller, Florian Westphal,
	netfilter-devel
In-Reply-To: <20180627133426.3858-2-fbl@redhat.com>

Flavio Leitner <fbl@redhat.com> wrote:
> Netfilter assumes that if the socket is present in the skb, then
> it can be used because that reference is cleaned up while the skb
> is crossing netns.
>
> We want to change that to preserve the socket reference in a future
> patch, so this is a preparation updating netfilter to check if the
> socket netns matches before use it.

Acked-by: Florian Westphal <fw@strlen.de>

^ permalink raw reply

* Re: [PATCH net-next v2] net: vhost: improve performance when enable busyloop
From: Jason Wang @ 2018-06-27 14:24 UTC (permalink / raw)
  To: xiangxia.m.yue; +Cc: virtualization, netdev, Tonghao Zhang, Michael S. Tsirkin
In-Reply-To: <1529990276-61157-1-git-send-email-xiangxia.m.yue@gmail.com>



On 2018年06月26日 13:17, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> This patch improves the guest receive performance from
> host. On the handle_tx side, we poll the sock receive
> queue at the same time. handle_rx do that in the same way.
>
> For avoiding deadlock, change the code to lock the vq one
> by one and use the VHOST_NET_VQ_XX as a subclass for
> mutex_lock_nested. With the patch, qemu can set differently
> the busyloop_timeout for rx or tx queue.
>
> We set the poll-us=100us and use the iperf3 to test
> its throughput. The iperf3 command is shown as below.
>
> on the guest:
> iperf3  -s -D
>
> on the host:
> iperf3  -c 192.168.1.100 -i 1 -P 10 -t 10 -M 1400
>
> * With the patch:     23.1 Gbits/sec
> * Without the patch:  12.7 Gbits/sec
>
> Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>

Thanks a lot for the patch. Looks good generally, but please split this 
big patch into separate ones like:

patch 1: lock vqs one by one
patch 2: replace magic number of lock annotation
patch 3: factor out generic busy polling logic to vhost_net_busy_poll()
patch 4: add rx busy polling in tx path.

And please cc Michael in v3.

Thanks

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add support for CBS QDISC
From: Jose Abreu @ 2018-06-27 14:32 UTC (permalink / raw)
  To: kbuild test robot, Jose Abreu, linux-sh, Rich Felker,
	Yoshinori Sato
  Cc: kbuild-all, netdev, David S. Miller, Joao Pinto, Vitor Soares,
	Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <201806272221.VPR2vJhr%fengguang.wu@intel.com>

++ SH Maintainers
++ SH ML

Hi SH Maintainers,

On 27-06-2018 15:15, kbuild test robot wrote:
> Hi Jose,
>
> I love your patch! Perhaps something to improve:
>
> [auto build test WARNING on net-next/master]
>
> url:    https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_0day-2Dci_linux_commits_Jose-2DAbreu_net-2Dstmmac-2DAdd-2Dsupport-2Dfor-2DCBS-2DQDISC_20180627-2D214704&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=qW5_vbinGt0OnyDfQ2wtKdb2ZGzCcLwq6Fmlaki61xw&e=
> config: sh-allyesconfig (attached as .config)
> compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
> reproduce:
>         wget https://urldefense.proofpoint.com/v2/url?u=https-3A__raw.githubusercontent.com_intel_lkp-2Dtests_master_sbin_make.cross&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=4nNar4fgVZq0LjrPKIZP30nxVUY4yeu5QeyKbmlsu8A&e= -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         GCC_VERSION=7.2.0 make.cross ARCH=sh 
>
> All warnings (new ones prefixed by >>):
>
>    In file included from ./arch/sh/include/generated/asm/div64.h:1:0,
>                     from include/linux/kernel.h:174,
>                     from include/linux/list.h:9,
>                     from include/linux/timer.h:5,
>                     from include/linux/workqueue.h:9,
>                     from include/net/pkt_cls.h:6,
>                     from drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:7:
>    drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c: In function 'tc_setup_cbs':
>    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
>      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
>                                ^
>>> drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:325:2: note: in expansion of macro 'do_div'
>      do_div(value, speed_div);
>      ^~~~~~
>    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
>      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \

I'm not seeing the reason for this warning as I'm using a 64 bit
var. I guess the warning is appearing only because its signed
[see source bellow]. Is this not supported?

Thanks and Best Regards,
Jose Miguel Abreu

>                                ^
>    drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:329:2: note: in expansion of macro 'do_div'
>      do_div(value, speed_div);
>      ^~~~~~
>
> vim +/do_div +325 drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c
>
>    291	
>    292	static int tc_setup_cbs(struct stmmac_priv *priv,
>    293				struct tc_cbs_qopt_offload *qopt)
>    294	{
>    295		u32 tx_queues_count = priv->plat->tx_queues_to_use;
>    296		u32 queue = qopt->queue;
>    297		u32 ptr, speed_div;
>    298		u32 mode_to_use;
>    299		s64 value;
>    300		int ret;
>    301	
>    302		/* Queue 0 is not AVB capable */
>    303		if (queue <= 0 || queue >= tx_queues_count)
>    304			return -EINVAL;
>    305		if (priv->speed != SPEED_100 && priv->speed != SPEED_1000)
>    306			return -EOPNOTSUPP;
>    307	
>    308		mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
>    309		if (mode_to_use == MTL_QUEUE_DCB && qopt->enable) {
>    310			ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_AVB);
>    311			if (ret)
>    312				return ret;
>    313	
>    314			priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB;
>    315		} else if (!qopt->enable) {
>    316			return stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_DCB);
>    317		}
>    318	
>    319		/* Port Transmit Rate and Speed Divider */
>    320		ptr = (priv->speed == SPEED_100) ? 4 : 8;
>    321		speed_div = (priv->speed == SPEED_100) ? 100000 : 1000000;
>    322	
>    323		/* Final adjustments for HW */
>    324		value = qopt->idleslope * 1024 * ptr;
>  > 325		do_div(value, speed_div);
>    326		priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0);
>    327	
>    328		value = -qopt->sendslope * 1024UL * ptr;
>    329		do_div(value, speed_div);
>    330		priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0);
>    331	
>    332		value = qopt->hicredit * 1024 * 8;
>    333		priv->plat->tx_queues_cfg[queue].high_credit = value & GENMASK(31, 0);
>    334	
>    335		value = qopt->locredit * 1024 * 8;
>    336		priv->plat->tx_queues_cfg[queue].low_credit = value & GENMASK(31, 0);
>    337	
>    338		ret = stmmac_config_cbs(priv, priv->hw,
>    339					priv->plat->tx_queues_cfg[queue].send_slope,
>    340					priv->plat->tx_queues_cfg[queue].idle_slope,
>    341					priv->plat->tx_queues_cfg[queue].high_credit,
>    342					priv->plat->tx_queues_cfg[queue].low_credit,
>    343					queue);
>    344		if (ret)
>    345			return ret;
>    346	
>    347		dev_info(priv->device, "CBS queue %d: send %d, idle %d, hi %d, lo %d\n",
>    348				queue, qopt->sendslope, qopt->idleslope,
>    349				qopt->hicredit, qopt->locredit);
>    350		return 0;
>    351	}
>    352	
>
> ---
> 0-DAY kernel test infrastructure                Open Source Technology Center
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.01.org_pipermail_kbuild-2Dall&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=yMjbhyNoi6ZESIohHtaqFOTXipZVefU7mA4Tfc5QPms&e=                   Intel Corporation

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add support for CBS QDISC
From: Jose Abreu @ 2018-06-27 14:36 UTC (permalink / raw)
  To: kbuild test robot, Jose Abreu, linux-sh, Rich Felker,
	Yoshinori Sato, linux-kernel@vger.kernel.org
  Cc: kbuild-all, netdev, David S. Miller, Joao Pinto, Vitor Soares,
	Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <3780919b-c9ce-2ab6-be3a-376752c608ac@synopsys.com>

++ LKML because I just noticed this is using generic headers.

On 27-06-2018 15:32, Jose Abreu wrote:
> ++ SH Maintainers
> ++ SH ML
>
> Hi SH Maintainers,
>
> On 27-06-2018 15:15, kbuild test robot wrote:
>> Hi Jose,
>>
>> I love your patch! Perhaps something to improve:
>>
>> [auto build test WARNING on net-next/master]
>>
>> url:    https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_0day-2Dci_linux_commits_Jose-2DAbreu_net-2Dstmmac-2DAdd-2Dsupport-2Dfor-2DCBS-2DQDISC_20180627-2D214704&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=qW5_vbinGt0OnyDfQ2wtKdb2ZGzCcLwq6Fmlaki61xw&e=
>> config: sh-allyesconfig (attached as .config)
>> compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
>> reproduce:
>>         wget https://urldefense.proofpoint.com/v2/url?u=https-3A__raw.githubusercontent.com_intel_lkp-2Dtests_master_sbin_make.cross&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=4nNar4fgVZq0LjrPKIZP30nxVUY4yeu5QeyKbmlsu8A&e= -O ~/bin/make.cross
>>         chmod +x ~/bin/make.cross
>>         # save the attached .config to linux build tree
>>         GCC_VERSION=7.2.0 make.cross ARCH=sh 
>>
>> All warnings (new ones prefixed by >>):
>>
>>    In file included from ./arch/sh/include/generated/asm/div64.h:1:0,
>>                     from include/linux/kernel.h:174,
>>                     from include/linux/list.h:9,
>>                     from include/linux/timer.h:5,
>>                     from include/linux/workqueue.h:9,
>>                     from include/net/pkt_cls.h:6,
>>                     from drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:7:
>>    drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c: In function 'tc_setup_cbs':
>>    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
>>      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
>>                                ^
>>>> drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:325:2: note: in expansion of macro 'do_div'
>>      do_div(value, speed_div);
>>      ^~~~~~
>>    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
>>      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> I'm not seeing the reason for this warning as I'm using a 64 bit
> var. I guess the warning is appearing only because its signed
> [see source bellow]. Is this not supported?
>
> Thanks and Best Regards,
> Jose Miguel Abreu
>
>>                                ^
>>    drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:329:2: note: in expansion of macro 'do_div'
>>      do_div(value, speed_div);
>>      ^~~~~~
>>
>> vim +/do_div +325 drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c
>>
>>    291	
>>    292	static int tc_setup_cbs(struct stmmac_priv *priv,
>>    293				struct tc_cbs_qopt_offload *qopt)
>>    294	{
>>    295		u32 tx_queues_count = priv->plat->tx_queues_to_use;
>>    296		u32 queue = qopt->queue;
>>    297		u32 ptr, speed_div;
>>    298		u32 mode_to_use;
>>    299		s64 value;
>>    300		int ret;
>>    301	
>>    302		/* Queue 0 is not AVB capable */
>>    303		if (queue <= 0 || queue >= tx_queues_count)
>>    304			return -EINVAL;
>>    305		if (priv->speed != SPEED_100 && priv->speed != SPEED_1000)
>>    306			return -EOPNOTSUPP;
>>    307	
>>    308		mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
>>    309		if (mode_to_use == MTL_QUEUE_DCB && qopt->enable) {
>>    310			ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_AVB);
>>    311			if (ret)
>>    312				return ret;
>>    313	
>>    314			priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB;
>>    315		} else if (!qopt->enable) {
>>    316			return stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_DCB);
>>    317		}
>>    318	
>>    319		/* Port Transmit Rate and Speed Divider */
>>    320		ptr = (priv->speed == SPEED_100) ? 4 : 8;
>>    321		speed_div = (priv->speed == SPEED_100) ? 100000 : 1000000;
>>    322	
>>    323		/* Final adjustments for HW */
>>    324		value = qopt->idleslope * 1024 * ptr;
>>  > 325		do_div(value, speed_div);
>>    326		priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0);
>>    327	
>>    328		value = -qopt->sendslope * 1024UL * ptr;
>>    329		do_div(value, speed_div);
>>    330		priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0);
>>    331	
>>    332		value = qopt->hicredit * 1024 * 8;
>>    333		priv->plat->tx_queues_cfg[queue].high_credit = value & GENMASK(31, 0);
>>    334	
>>    335		value = qopt->locredit * 1024 * 8;
>>    336		priv->plat->tx_queues_cfg[queue].low_credit = value & GENMASK(31, 0);
>>    337	
>>    338		ret = stmmac_config_cbs(priv, priv->hw,
>>    339					priv->plat->tx_queues_cfg[queue].send_slope,
>>    340					priv->plat->tx_queues_cfg[queue].idle_slope,
>>    341					priv->plat->tx_queues_cfg[queue].high_credit,
>>    342					priv->plat->tx_queues_cfg[queue].low_credit,
>>    343					queue);
>>    344		if (ret)
>>    345			return ret;
>>    346	
>>    347		dev_info(priv->device, "CBS queue %d: send %d, idle %d, hi %d, lo %d\n",
>>    348				queue, qopt->sendslope, qopt->idleslope,
>>    349				qopt->hicredit, qopt->locredit);
>>    350		return 0;
>>    351	}
>>    352	
>>
>> ---
>> 0-DAY kernel test infrastructure                Open Source Technology Center
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.01.org_pipermail_kbuild-2Dall&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=yMjbhyNoi6ZESIohHtaqFOTXipZVefU7mA4Tfc5QPms&e=                   Intel Corporation

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add support for CBS QDISC
From: Geert Uytterhoeven @ 2018-06-27 14:36 UTC (permalink / raw)
  To: Jose.Abreu
  Cc: kbuild test robot, Linux-sh list, Rich Felker, Yoshinori Sato,
	kbuild-all, netdev, David S. Miller, Joao.Pinto, Vitor Soares,
	Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <3780919b-c9ce-2ab6-be3a-376752c608ac@synopsys.com>

Hi Jose,

On Wed, Jun 27, 2018 at 4:32 PM Jose Abreu <Jose.Abreu@synopsys.com> wrote:
> ++ SH Maintainers
> ++ SH ML
>
> Hi SH Maintainers,
>
> On 27-06-2018 15:15, kbuild test robot wrote:
> > Hi Jose,
> >
> > I love your patch! Perhaps something to improve:
> >
> > [auto build test WARNING on net-next/master]
> >
> > url:    https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_0day-2Dci_linux_commits_Jose-2DAbreu_net-2Dstmmac-2DAdd-2Dsupport-2Dfor-2DCBS-2DQDISC_20180627-2D214704&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=qW5_vbinGt0OnyDfQ2wtKdb2ZGzCcLwq6Fmlaki61xw&e=
> > config: sh-allyesconfig (attached as .config)
> > compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
> > reproduce:
> >         wget https://urldefense.proofpoint.com/v2/url?u=https-3A__raw.githubusercontent.com_intel_lkp-2Dtests_master_sbin_make.cross&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=4nNar4fgVZq0LjrPKIZP30nxVUY4yeu5QeyKbmlsu8A&e= -O ~/bin/make.cross
> >         chmod +x ~/bin/make.cross
> >         # save the attached .config to linux build tree
> >         GCC_VERSION=7.2.0 make.cross ARCH=sh
> >
> > All warnings (new ones prefixed by >>):
> >
> >    In file included from ./arch/sh/include/generated/asm/div64.h:1:0,
> >                     from include/linux/kernel.h:174,
> >                     from include/linux/list.h:9,
> >                     from include/linux/timer.h:5,
> >                     from include/linux/workqueue.h:9,
> >                     from include/net/pkt_cls.h:6,
> >                     from drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:7:
> >    drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c: In function 'tc_setup_cbs':
> >    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
> >      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> >                                ^
> >>> drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:325:2: note: in expansion of macro 'do_div'
> >      do_div(value, speed_div);
> >      ^~~~~~
> >    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
> >      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
>
> I'm not seeing the reason for this warning as I'm using a 64 bit
> var. I guess the warning is appearing only because its signed
> [see source bellow]. Is this not supported?

No, the first parameter of do_div() must be u64, hence the check to
enforce that.

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [RFC PATCH v2 net-next 06/12] net: core: propagate SKB lists through packet_type lookup
From: Willem de Bruijn @ 2018-06-27 14:36 UTC (permalink / raw)
  To: ecree; +Cc: linux-net-drivers, Network Development, David Miller
In-Reply-To: <a665e6bc-176c-6c5a-282b-a3ed52e0a13f@solarflare.com>

On Tue, Jun 26, 2018 at 8:19 PM Edward Cree <ecree@solarflare.com> wrote:
>
> __netif_receive_skb_taps() does a depressingly large amount of per-packet
>  work that can't easily be listified, because the another_round looping
>  makes it nontrivial to slice up into smaller functions.
> Fortunately, most of that work disappears in the fast path:
>  * Hardware devices generally don't have an rx_handler
>  * Unless you're tcpdumping or something, there is usually only one ptype
>  * VLAN processing comes before the protocol ptype lookup, so doesn't force
>    a pt_prev deliver
>  so normally, __netif_receive_skb_taps() will run straight through and return
>  the one ptype found in ptype_base[hash of skb->protocol].
>
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---

> -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
> +static int __netif_receive_skb_taps(struct sk_buff *skb, bool pfmemalloc,
> +                                   struct packet_type **pt_prev)

A lot of code churn can be avoided by keeping local variable pt_prev and
calling this ppt_prev or so, then assigning just before returning on success.

Also, this function does more than just process network taps.

>  {
> -       struct packet_type *ptype, *pt_prev;
>         rx_handler_func_t *rx_handler;
>         struct net_device *orig_dev;
>         bool deliver_exact = false;
> +       struct packet_type *ptype;
>         int ret = NET_RX_DROP;
>         __be16 type;
>
> @@ -4514,7 +4515,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
>                 skb_reset_transport_header(skb);
>         skb_reset_mac_len(skb);
>
> -       pt_prev = NULL;
> +       *pt_prev = NULL;
>
>  another_round:
>         skb->skb_iif = skb->dev->ifindex;
> @@ -4535,25 +4536,25 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
>                 goto skip_taps;
>
>         list_for_each_entry_rcu(ptype, &ptype_all, list) {
> -               if (pt_prev)
> -                       ret = deliver_skb(skb, pt_prev, orig_dev);
> -               pt_prev = ptype;
> +               if (*pt_prev)
> +                       ret = deliver_skb(skb, *pt_prev, orig_dev);
> +               *pt_prev = ptype;
>         }

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add support for CBS QDISC
From: Jose Abreu @ 2018-06-27 14:44 UTC (permalink / raw)
  To: Geert Uytterhoeven, Jose.Abreu
  Cc: kbuild test robot, Linux-sh list, Rich Felker, Yoshinori Sato,
	kbuild-all, netdev, David S. Miller, Joao.Pinto, Vitor Soares,
	Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <CAMuHMdUhQBpfZGk-18F8BxPqRzHX4GEEV-=4LZ9NeO=pm6bsyg@mail.gmail.com>

Hi Geert,

On 27-06-2018 15:36, Geert Uytterhoeven wrote:
> Hi Jose,
>
> On Wed, Jun 27, 2018 at 4:32 PM Jose Abreu <Jose.Abreu@synopsys.com> wrote:
>> ++ SH Maintainers
>> ++ SH ML
>>
>> Hi SH Maintainers,
>>
>> On 27-06-2018 15:15, kbuild test robot wrote:
>>> Hi Jose,
>>>
>>> I love your patch! Perhaps something to improve:
>>>
>>> [auto build test WARNING on net-next/master]
>>>
>>> url:    https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_0day-2Dci_linux_commits_Jose-2DAbreu_net-2Dstmmac-2DAdd-2Dsupport-2Dfor-2DCBS-2DQDISC_20180627-2D214704&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=qW5_vbinGt0OnyDfQ2wtKdb2ZGzCcLwq6Fmlaki61xw&e=
>>> config: sh-allyesconfig (attached as .config)
>>> compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
>>> reproduce:
>>>         wget https://urldefense.proofpoint.com/v2/url?u=https-3A__raw.githubusercontent.com_intel_lkp-2Dtests_master_sbin_make.cross&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=4nNar4fgVZq0LjrPKIZP30nxVUY4yeu5QeyKbmlsu8A&e= -O ~/bin/make.cross
>>>         chmod +x ~/bin/make.cross
>>>         # save the attached .config to linux build tree
>>>         GCC_VERSION=7.2.0 make.cross ARCH=sh
>>>
>>> All warnings (new ones prefixed by >>):
>>>
>>>    In file included from ./arch/sh/include/generated/asm/div64.h:1:0,
>>>                     from include/linux/kernel.h:174,
>>>                     from include/linux/list.h:9,
>>>                     from include/linux/timer.h:5,
>>>                     from include/linux/workqueue.h:9,
>>>                     from include/net/pkt_cls.h:6,
>>>                     from drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:7:
>>>    drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c: In function 'tc_setup_cbs':
>>>    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
>>>      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
>>>                                ^
>>>>> drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:325:2: note: in expansion of macro 'do_div'
>>>      do_div(value, speed_div);
>>>      ^~~~~~
>>>    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
>>>      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
>> I'm not seeing the reason for this warning as I'm using a 64 bit
>> var. I guess the warning is appearing only because its signed
>> [see source bellow]. Is this not supported?
> No, the first parameter of do_div() must be u64, hence the check to
> enforce that.

Hmm ok. I guess I can use two variables then because the only
case value will be negative is with locredit and this does not
need the do_div.

Thanks for the clarification.

Thanks and Best Regards,
Jose Miguel Abreu

>
> Gr{oetje,eeting}s,
>
>                         Geert
>

^ permalink raw reply

* Re: [BUG] net: tg3: two possible data races
From: Siva Reddy Kallam @ 2018-06-27 14:46 UTC (permalink / raw)
  To: Jia-Ju Bai
  Cc: Prashant Sreedharan, Michael Chan, Linux Netdev List,
	Linux Kernel Mailing List

On Wed, Jun 27, 2018 at 7:17 AM, Jia-Ju Bai <baijiaju1990@gmail.com> wrote:
> The call paths in Linux 4.16.7 that may raise the first data race:
>
> CPU0:
> tg3_open
>     tg3_start
>         line 11611: spin_lock_bh()
>         tg3_enable_ints
>             line 1023: tp->tnapi->last_tag [READ]
>
> CPU1:
> tg3_poll
>     line 7341: tnapi->last_tag [WRITE]
>
> The READ operation in CPU0 is performed with holding a spinlock (line
> 11611), but the WRITE operation in CPU1 is performed without holding this
> spinlock, so it may cause a data race here.
> A possible fix may be to add spin_lock_bh() in tg3_poll().
>
> -----------------------------------------------------------------------
>
> The call paths in Linux 4.16.7 that may raise the second data race:
>
> CPU0:
> tg3_open
>     tg3_start
>         line 11611: spin_lock_bh()
>         tg3_enable_ints
>             line 1023: tp->irq_sync [WRITE]
>
> CPU1:
> tg3_interrupt_tagged
>     tg3_irq_sync
>         line 7341: tp->irq_sync [READ]
>
> The WRITE operation in CPU0 is performed with holding a spinlock (line
> 11611), but the READ operation in CPU1 is performed without holding this
> spinlock, so it may cause a data race here.
> A possible fix may be to add spin_lock_bh() in tg3_irq_sync().
>
> I am not sure that whether the possible fixes are correct, so I only report
> the data races.
>
>
> Best wishes,
> Jia-Ju Bai
>
>
We will evaluate this and provide an update on this.

^ permalink raw reply

* Re: [PATCH net-next] tcp: replace LINUX_MIB_TCPOFODROP with LINUX_MIB_TCPRMEMFULLDROP for drops due to receive buffer full
From: Eric Dumazet @ 2018-06-27 14:48 UTC (permalink / raw)
  To: Yafang Shao, edumazet, davem; +Cc: netdev, linux-kernel
In-Reply-To: <1530100216-13070-1-git-send-email-laoar.shao@gmail.com>



On 06/27/2018 04:50 AM, Yafang Shao wrote:
> When sk_rmem_alloc is larger than the receive buffer and we can't
> schedule more memory for it, the skb will be dropped.
> 
> In above situation, if this skb is put into the ofo queue,
> LINUX_MIB_TCPOFODROP is incremented to track it,
> while if this skb is put into the receive queue, there's no record.
> 
> So LINUX_MIB_TCPOFODROP is replaced with LINUX_MIB_TCPRMEMFULLDROP to track
> this behavior.


Hi Yafang

I do not want to remove TCPOFODrop and mix multiple causes in one single counter.

Please take a look at commit a6df1ae9383697c to have the reasoning.

Thanks.

^ permalink raw reply

* Re: [RFC PATCH v2 net-next 06/12] net: core: propagate SKB lists through packet_type lookup
From: Edward Cree @ 2018-06-27 14:49 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: linux-net-drivers, Network Development, David Miller
In-Reply-To: <CAF=yD-JANo1aq+pKqxzQi5g60Jc7Fkm75XJWpNHfhuzxFkK+iw@mail.gmail.com>

On 27/06/18 15:36, Willem de Bruijn wrote:
> On Tue, Jun 26, 2018 at 8:19 PM Edward Cree <ecree@solarflare.com> wrote:
>> __netif_receive_skb_taps() does a depressingly large amount of per-packet
>>  work that can't easily be listified, because the another_round looping
>>  makes it nontrivial to slice up into smaller functions.
>> Fortunately, most of that work disappears in the fast path:
>>  * Hardware devices generally don't have an rx_handler
>>  * Unless you're tcpdumping or something, there is usually only one ptype
>>  * VLAN processing comes before the protocol ptype lookup, so doesn't force
>>    a pt_prev deliver
>>  so normally, __netif_receive_skb_taps() will run straight through and return
>>  the one ptype found in ptype_base[hash of skb->protocol].
>>
>> Signed-off-by: Edward Cree <ecree@solarflare.com>
>> ---
>> -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
>> +static int __netif_receive_skb_taps(struct sk_buff *skb, bool pfmemalloc,
>> +                                   struct packet_type **pt_prev)
> A lot of code churn can be avoided by keeping local variable pt_prev and
> calling this ppt_prev or so, then assigning just before returning on success.
Good idea, I'll try that.

> Also, this function does more than just process network taps.
This is true, but naming things is hard, and I couldn't think of either a
 better new name for this function or a name that could fit in between
 __netif_receive_skb() and __netif_receive_skb_core() for the new function
 in my patch named __netif_receive_skb_core().  Any suggestions?

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add support for CBS QDISC
From: Geert Uytterhoeven @ 2018-06-27 14:51 UTC (permalink / raw)
  To: Jose.Abreu
  Cc: kbuild test robot, Linux-sh list, Rich Felker, Yoshinori Sato,
	kbuild-all, netdev, David S. Miller, Joao.Pinto, Vitor Soares,
	Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <222b400d-bc25-5854-370a-84d0d9056932@synopsys.com>

Hi Jose,

On Wed, Jun 27, 2018 at 4:44 PM Jose Abreu <Jose.Abreu@synopsys.com> wrote:
> On 27-06-2018 15:36, Geert Uytterhoeven wrote:
> > On Wed, Jun 27, 2018 at 4:32 PM Jose Abreu <Jose.Abreu@synopsys.com> wrote:
> >> ++ SH Maintainers
> >> ++ SH ML
> >>
> >> Hi SH Maintainers,
> >>
> >> On 27-06-2018 15:15, kbuild test robot wrote:
> >>> Hi Jose,
> >>>
> >>> I love your patch! Perhaps something to improve:
> >>>
> >>> [auto build test WARNING on net-next/master]
> >>>
> >>> url:    https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_0day-2Dci_linux_commits_Jose-2DAbreu_net-2Dstmmac-2DAdd-2Dsupport-2Dfor-2DCBS-2DQDISC_20180627-2D214704&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=qW5_vbinGt0OnyDfQ2wtKdb2ZGzCcLwq6Fmlaki61xw&e=
> >>> config: sh-allyesconfig (attached as .config)
> >>> compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
> >>> reproduce:
> >>>         wget https://urldefense.proofpoint.com/v2/url?u=https-3A__raw.githubusercontent.com_intel_lkp-2Dtests_master_sbin_make.cross&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=WHDsc6kcWAl4i96Vm5hJ_19IJiuxx_p_Rzo2g-uHDKw&m=YIae3rrFzqYg_5b4eVIAXuyvT_EV0vKACS25rJugux8&s=4nNar4fgVZq0LjrPKIZP30nxVUY4yeu5QeyKbmlsu8A&e= -O ~/bin/make.cross
> >>>         chmod +x ~/bin/make.cross
> >>>         # save the attached .config to linux build tree
> >>>         GCC_VERSION=7.2.0 make.cross ARCH=sh
> >>>
> >>> All warnings (new ones prefixed by >>):
> >>>
> >>>    In file included from ./arch/sh/include/generated/asm/div64.h:1:0,
> >>>                     from include/linux/kernel.h:174,
> >>>                     from include/linux/list.h:9,
> >>>                     from include/linux/timer.h:5,
> >>>                     from include/linux/workqueue.h:9,
> >>>                     from include/net/pkt_cls.h:6,
> >>>                     from drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:7:
> >>>    drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c: In function 'tc_setup_cbs':
> >>>    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
> >>>      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> >>>                                ^
> >>>>> drivers/net//ethernet/stmicro/stmmac/stmmac_tc.c:325:2: note: in expansion of macro 'do_div'
> >>>      do_div(value, speed_div);
> >>>      ^~~~~~
> >>>    include/asm-generic/div64.h:222:28: warning: comparison of distinct pointer types lacks a cast
> >>>      (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> >> I'm not seeing the reason for this warning as I'm using a 64 bit
> >> var. I guess the warning is appearing only because its signed
> >> [see source bellow]. Is this not supported?
> > No, the first parameter of do_div() must be u64, hence the check to
> > enforce that.
>
> Hmm ok. I guess I can use two variables then because the only
> case value will be negative is with locredit and this does not
> need the do_div.

You can use div_s64_rem() or div_s64() (in include/linux/math64.h).

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox