Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH 1/1] crypto:chelsio: Update ntx queue received from cxgb4
From: Lino Sanfilippo @ 2018-10-13  0:16 UTC (permalink / raw)
  To: Harsh Jain, herbert, atul.gupta, indranil, swise, varun, ganeshgr,
	netdev, linux-crypto
In-Reply-To: <64a8109a1c4ce9bf53797dc3dfb8deb64ceef4e2.1539343454.git.harsh@chelsio.com>

Hi,

> +	if (uld_type == CXGB4_ULD_CRYPTO) {
> +		i = min_t(int, adap->vres.ncrypto_fc,
> +			  num_online_cpus());
> +		txq_info->ntxq = rounddown(i, adap->params.nports);
> +		if (txq_info->ntxq <= 0) {
> +			dev_warn(adap->pdev_dev, "Crypto Tx Queues can't be zero\n");
> +			return -EINVAL;
> +		}

Shouldn't we free txq_info in the error case?

Regards,
Lino

^ permalink raw reply

* Re: [PATCH] netfilter: x_tables: fix missing unlock if table init fails
From: Omar Sandoval @ 2018-10-12 23:35 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal
  Cc: David S. Miller, netfilter-devel, coreteam, netdev, kernel-team
In-Reply-To: <32ddddccb89d8be6a73cd2de2fb20283b2ef6a94.1539385788.git.osandov@fb.com>

On Fri, Oct 12, 2018 at 04:16:53PM -0700, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> Commit 7dde07e9c536 ("netfilter: x_tables: unlock on error in
> xt_find_table_lock()") fixed one missing unlock in xt_find_table_lock(),
> but we're also missing one if t->table_init() fails.
> 
> Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default")
> Signed-off-by: Omar Sandoval <osandov@fb.com>

Whoops, please ignore this, I missed the unlock right before the
table_init call...

^ permalink raw reply

* [PATCH] netfilter: x_tables: fix missing unlock if table init fails
From: Omar Sandoval @ 2018-10-12 23:16 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal
  Cc: David S. Miller, netfilter-devel, coreteam, netdev, kernel-team

From: Omar Sandoval <osandov@fb.com>

Commit 7dde07e9c536 ("netfilter: x_tables: unlock on error in
xt_find_table_lock()") fixed one missing unlock in xt_find_table_lock(),
but we're also missing one if t->table_init() fails.

Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default")
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 net/netfilter/x_tables.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index aecadd471e1d..2a0106e6305e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1207,28 +1207,31 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 				    const char *name)
 {
 	struct xt_table *t, *found = NULL;
+	int err;
 
 	mutex_lock(&xt[af].mutex);
 	list_for_each_entry(t, &net->xt.tables[af], list)
 		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
 			return t;
 
-	if (net == &init_net)
+	if (net == &init_net) {
+		err = -ENOENT;
 		goto out;
+	}
 
 	/* Table doesn't exist in this netns, re-try init */
 	list_for_each_entry(t, &init_net.xt.tables[af], list) {
-		int err;
-
 		if (strcmp(t->name, name))
 			continue;
-		if (!try_module_get(t->me))
+		if (!try_module_get(t->me)) {
+			err = -ENOENT;
 			goto out;
+		}
 		mutex_unlock(&xt[af].mutex);
 		err = t->table_init(net);
 		if (err < 0) {
 			module_put(t->me);
-			return ERR_PTR(err);
+			goto out;
 		}
 
 		found = t;
@@ -1237,8 +1240,10 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 		break;
 	}
 
-	if (!found)
+	if (!found) {
+		err = -ENOENT;
 		goto out;
+	}
 
 	/* and once again: */
 	list_for_each_entry(t, &net->xt.tables[af], list)
@@ -1248,7 +1253,7 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 	module_put(found->me);
  out:
 	mutex_unlock(&xt[af].mutex);
-	return ERR_PTR(-ENOENT);
+	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(xt_find_table_lock);
 
-- 
2.19.1

^ permalink raw reply related

* [PATCH net-next 2/2] selftests: pmtu: Add optional traffic captures for single tests
From: Stefano Brivio @ 2018-10-12 21:54 UTC (permalink / raw)
  To: David S. Miller; +Cc: Sabrina Dubroca, netdev
In-Reply-To: <cover.1539361090.git.sbrivio@redhat.com>

If --trace is passed as an option and tcpdump is available,
capture traffic for all relevant interfaces to per-test pcap
files named <test>_<interface>.pcap.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
---
 tools/testing/selftests/net/pmtu.sh | 60 +++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 8278a24f5ba6..ed549c03d2a7 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -142,6 +142,7 @@ dummy6_mask="64"
 
 cleanup_done=1
 err_buf=
+tcpdump_pids=
 
 err() {
 	err_buf="${err_buf}${1}
@@ -284,7 +285,24 @@ setup() {
 	done
 }
 
+trace() {
+	[ $tracing -eq 0 ] && return
+
+	for arg do
+		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
+		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+		tcpdump_pids="${tcpdump_pids} $!"
+		ns_cmd=
+	done
+	sleep 1
+}
+
 cleanup() {
+	for pid in ${tcpdump_pids}; do
+		kill ${pid}
+	done
+	tcpdump_pids=
+
 	[ ${cleanup_done} -eq 1 ] && return
 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
 		ip netns del ${n} 2> /dev/null
@@ -357,6 +375,10 @@ test_pmtu_ipvX() {
 	family=${1}
 
 	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
 
 	if [ ${family} -eq 4 ]; then
 		ping=ping
@@ -445,6 +467,8 @@ test_pmtu_ipv6_exception() {
 
 test_pmtu_vti4_exception() {
 	setup namespaces veth vti4 xfrm4 || return 2
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
 
 	veth_mtu=1500
 	vti_mtu=$((veth_mtu - 20))
@@ -473,6 +497,8 @@ test_pmtu_vti4_exception() {
 
 test_pmtu_vti6_exception() {
 	setup namespaces veth vti6 xfrm6 || return 2
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
 	fail=0
 
 	# Create route exception by exceeding link layer MTU
@@ -643,29 +669,49 @@ test_pmtu_vti6_link_change_mtu() {
 
 usage() {
 	echo
-	echo "$0 [TEST]..."
+	echo "$0 [OPTIONS] [TEST]..."
 	echo "If no TEST argument is given, all tests will be run."
 	echo
+	echo "Options"
+	echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
+	echo
 	echo "Available tests${tests}"
 	exit 1
 }
 
+exitcode=0
+desc=0
+IFS="	
+"
+
+tracing=0
 for arg do
-	# Check first that all requested tests are available before running any
-	command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
+	if [ "${arg}" != "${arg#--*}" ]; then
+		opt="${arg#--}"
+		if [ "${opt}" = "trace" ]; then
+			if which tcpdump > /dev/null 2>&1; then
+				tracing=1
+			else
+				echo "=== tcpdump not available, tracing disabled"
+			fi
+		else
+			usage
+		fi
+	else
+		# Check first that all requested tests are available before
+		# running any
+		command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
+	fi
 done
 
 trap cleanup EXIT
 
-exitcode=0
-desc=0
-IFS="	
-"
 for t in ${tests}; do
 	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
 
 	run_this=1
 	for arg do
+		[ "${arg}" != "${arg#--*}" ] && continue
 		[ "${arg}" = "${name}" ] && run_this=1 && break
 		run_this=0
 	done
-- 
2.19.1

^ permalink raw reply related

* [PATCH net-next 1/2] selftests: pmtu: Allow selection of single tests
From: Stefano Brivio @ 2018-10-12 21:54 UTC (permalink / raw)
  To: David S. Miller; +Cc: Sabrina Dubroca, netdev
In-Reply-To: <cover.1539361090.git.sbrivio@redhat.com>

As number of tests is growing, it's quite convenient to allow
single tests to be run.

Display usage when the script is run with any invalid argument,
keep existing semantics when no arguments are passed so that
automated runs won't break.

Instead of just looping on the list of requested tests, if any,
check first that they exist, and go through them in a nested
loop to keep the existing way to display test descriptions.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
---
 tools/testing/selftests/net/pmtu.sh | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index b9cdb68df4c5..8278a24f5ba6 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -641,6 +641,20 @@ test_pmtu_vti6_link_change_mtu() {
 	return ${fail}
 }

+usage() {
+	echo
+	echo "$0 [TEST]..."
+	echo "If no TEST argument is given, all tests will be run."
+	echo
+	echo "Available tests${tests}"
+	exit 1
+}
+
+for arg do
+	# Check first that all requested tests are available before running any
+	command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
+done
+
 trap cleanup EXIT

 exitcode=0
@@ -650,6 +664,13 @@ IFS="
 for t in ${tests}; do
 	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0

+	run_this=1
+	for arg do
+		[ "${arg}" = "${name}" ] && run_this=1 && break
+		run_this=0
+	done
+	[ $run_this -eq 0 ] && continue
+
 	(
 		unset IFS
 		eval test_${name}
-- 
2.19.1

^ permalink raw reply related

* [PATCH net-next 0/2] selftests: pmtu: Add test choice and captures
From: Stefano Brivio @ 2018-10-12 21:54 UTC (permalink / raw)
  To: David S. Miller; +Cc: Sabrina Dubroca, netdev

This series adds a couple of features useful for debugging: 1/2
allows selecting single tests and 2/2 adds optional traffic
captures.

Semantics for current invocation of test script are preserved.

Stefano Brivio (2):
  selftests: pmtu: Allow selection of single tests
  selftests: pmtu: Add optional traffic captures for single tests

 tools/testing/selftests/net/pmtu.sh | 69 ++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

-- 
2.19.1

^ permalink raw reply

* [PATCH net 2/2] geneve, vxlan: Don't set exceptions if skb->len < mtu
From: Stefano Brivio @ 2018-10-12 21:53 UTC (permalink / raw)
  To: David S. Miller; +Cc: Xin Long, Sabrina Dubroca, netdev
In-Reply-To: <cover.1539381018.git.sbrivio@redhat.com>

We shouldn't abuse exceptions: if the destination MTU is already higher
than what we're transmitting, no exception should be created.

Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path")
Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
---
 drivers/net/geneve.c |  7 +++----
 drivers/net/vxlan.c  |  4 ++--
 include/net/dst.h    | 10 ++++++++++
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 61c4bfbeb41c..493cd382b8aa 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -830,8 +830,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
-	skb_dst_update_pmtu(skb, dst_mtu(&rt->dst) -
-				 GENEVE_IPV4_HLEN - info->options_len);
+	skb_tunnel_check_pmtu(skb, &rt->dst,
+			      GENEVE_IPV4_HLEN + info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
@@ -872,8 +872,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
-	skb_dst_update_pmtu(skb, dst_mtu(dst) -
-				 GENEVE_IPV6_HLEN - info->options_len);
+	skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 22e0ce592e07..27bd586b94b0 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2194,7 +2194,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ndst = &rt->dst;
-		skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN_HEADROOM);
+		skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2231,7 +2231,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				goto out_unlock;
 		}
 
-		skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN6_HEADROOM);
+		skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
diff --git a/include/net/dst.h b/include/net/dst.h
index 7f735e76ca73..6cf0870414c7 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
 		dst->ops->update_pmtu(dst, NULL, skb, mtu);
 }
 
+static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+					 struct dst_entry *encap_dst,
+					 int headroom)
+{
+	u32 encap_mtu = dst_mtu(encap_dst);
+
+	if (skb->len > encap_mtu - headroom)
+		skb_dst_update_pmtu(skb, encap_mtu - headroom);
+}
+
 #endif /* _NET_DST_H */
-- 
2.19.1

^ permalink raw reply related

* [PATCH net 1/2] geneve, vxlan: Don't check skb_dst() twice
From: Stefano Brivio @ 2018-10-12 21:53 UTC (permalink / raw)
  To: David S. Miller; +Cc: Xin Long, Sabrina Dubroca, netdev
In-Reply-To: <cover.1539381018.git.sbrivio@redhat.com>

Commit f15ca723c1eb ("net: don't call update_pmtu unconditionally") avoids
that we try updating PMTU for a non-existent destination, but didn't clean
up cases where the check was already explicit. Drop those redundant checks.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
---
 drivers/net/geneve.c | 15 ++++-----------
 drivers/net/vxlan.c  | 12 ++----------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 6acb6b5718b9..61c4bfbeb41c 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -830,12 +830,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
-	if (skb_dst(skb)) {
-		int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN -
-			  info->options_len;
-
-		skb_dst_update_pmtu(skb, mtu);
-	}
+	skb_dst_update_pmtu(skb, dst_mtu(&rt->dst) -
+				 GENEVE_IPV4_HLEN - info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
@@ -876,11 +872,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
-	if (skb_dst(skb)) {
-		int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len;
-
-		skb_dst_update_pmtu(skb, mtu);
-	}
+	skb_dst_update_pmtu(skb, dst_mtu(dst) -
+				 GENEVE_IPV6_HLEN - info->options_len);
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
 	if (geneve->collect_md) {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2b8da2b7e721..22e0ce592e07 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2194,11 +2194,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ndst = &rt->dst;
-		if (skb_dst(skb)) {
-			int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
-
-			skb_dst_update_pmtu(skb, mtu);
-		}
+		skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2235,11 +2231,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				goto out_unlock;
 		}
 
-		if (skb_dst(skb)) {
-			int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
-
-			skb_dst_update_pmtu(skb, mtu);
-		}
+		skb_dst_update_pmtu(skb, dst_mtu(ndst) - VXLAN6_HEADROOM);
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
-- 
2.19.1

^ permalink raw reply related

* [PATCH net 0/2] geneve, vxlan: Don't set exceptions if skb->len < mtu
From: Stefano Brivio @ 2018-10-12 21:53 UTC (permalink / raw)
  To: David S. Miller; +Cc: Xin Long, Sabrina Dubroca, netdev

This series fixes the exception abuse described in 2/2, and 1/2
is just a preparatory change to make 2/2 less ugly.

Stefano Brivio (2):
  geneve, vxlan: Don't check skb_dst() twice
  geneve, vxlan: Don't set exceptions if skb->len < mtu

 drivers/net/geneve.c | 14 +++-----------
 drivers/net/vxlan.c  | 12 ++----------
 include/net/dst.h    | 10 ++++++++++
 3 files changed, 15 insertions(+), 21 deletions(-)

-- 
2.19.1

^ permalink raw reply

* Re: [PATCH bpf-next 4/8] tls: convert to generic sk_msg interface
From: Daniel Borkmann @ 2018-10-12 21:51 UTC (permalink / raw)
  To: Dave Watson
  Cc: alexei.starovoitov@gmail.com, john.fastabend@gmail.com,
	netdev@vger.kernel.org
In-Reply-To: <20181012201612.zghznihoqrpt32tg@mohitraman-mbp.dhcp.thefacebook.com>

On 10/12/2018 10:16 PM, Dave Watson wrote:
> On 10/11/18 02:45 AM, Daniel Borkmann wrote:
>> Convert kTLS over to make use of sk_msg interface for plaintext and
>> encrypted scattergather data, so it reuses all the sk_msg helpers
>> and data structure which later on in a second step enables to glue
>> this to BPF.
> 
> Looks very clean, thanks!

Thanks, it indeed allows for getting rid of quite a bit of open coded
code by converting to sk_msg API. As it was mentioned in the other mail,
we'd also be able to reuse this framework in future for other potential
additions or ULPs aside from that; consolidating sockmap and tls to work
on the same data structure also helped a lot in testing.

>> -static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
>> -			      int length, int *pages_used,
>> -			      unsigned int *size_used,
>> -			      struct scatterlist *to, int to_max_pages,
>> -			      bool charge)
>> -{
> 
> ...
> 
>> -			err = zerocopy_from_iter(sk, out_iov, data_len, &pages,
>> -						 chunk, &sgout[1],
>> -						 (n_sgout - 1), false);
>> +			err = tls_setup_from_iter(sk, out_iov, data_len,
>> +						  &pages, chunk, &sgout[1],
>> +						  (n_sgout - 1));
> 
> Any reason not to add the 'bool charge' to sk_msg_zerocopy_from_iter?
> Then tls_setup_from_iter is not necessary.

I left this bit aside for now by leaving the tls_setup_from_iter() as is,
basically as current zerocopy_from_iter() in current tls code minus the
charge since not used here. Given this is only triggered in RX path (which
is not sk_msg based right now) I didn't want to wrap it into a fake/temp
sk_msg object just for calling into sk_msg_zerocopy_from_iter(), felt a bit
unclean and given the complexity we already have probably more appropriate
to pursue in a second step.

Thanks,
Daniel

^ permalink raw reply

* [PATCH bpf-next] bpf: Fix dev pointer dereference from sk_skb
From: Joe Stringer @ 2018-10-12 21:50 UTC (permalink / raw)
  To: daniel; +Cc: netdev, dan.carpenter, ast

Dan Carpenter reports:

The patch 6acc9b432e67: "bpf: Add helper to retrieve socket in BPF"
from Oct 2, 2018, leads to the following Smatch complaint:

    net/core/filter.c:4893 bpf_sk_lookup()
    error: we previously assumed 'skb->dev' could be null (see line 4885)

Fix this issue by checking skb->dev before using it.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
---
 net/core/filter.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 4bbc6567fcb8..b844761b5d4c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4821,9 +4821,12 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 			      struct sk_buff *skb, u8 family, u8 proto)
 {
-	int dif = skb->dev->ifindex;
 	bool refcounted = false;
 	struct sock *sk = NULL;
+	int dif = 0;
+
+	if (skb->dev)
+		dif = skb->dev->ifindex;
 
 	if (family == AF_INET) {
 		__be32 src4 = tuple->ipv4.saddr;
-- 
2.17.1

^ permalink raw reply related

* [PATCH net-next] r8169: remove unneeded call to netif_stop_queue in rtl8169_net_suspend
From: Heiner Kallweit @ 2018-10-12 21:30 UTC (permalink / raw)
  To: David Miller, Realtek linux nic maintainers; +Cc: netdev@vger.kernel.org

netif_device_detach() stops all tx queues already, so we don't need
this call.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8169.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index e6e1790f4..a78be5937 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -6825,7 +6825,6 @@ static void rtl8169_net_suspend(struct net_device *dev)
 
 	phy_stop(dev->phydev);
 	netif_device_detach(dev);
-	netif_stop_queue(dev);
 
 	rtl_lock_work(tp);
 	napi_disable(&tp->napi);
-- 
2.19.1

^ permalink raw reply related

* [PATCH net-next] r8169: simplify rtl8169_set_magic_reg
From: Heiner Kallweit @ 2018-10-12 21:23 UTC (permalink / raw)
  To: David Miller, Realtek linux nic maintainers; +Cc: netdev@vger.kernel.org

Simplify this function, no functional change intended.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/ethernet/realtek/r8169.c | 32 +++++++++++-----------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 7d3f671e1..e6e1790f4 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4553,27 +4553,19 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 
 static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
 {
-	static const struct rtl_cfg2_info {
-		u32 mac_version;
-		u32 clk;
-		u32 val;
-	} cfg2_info [] = {
-		{ RTL_GIGA_MAC_VER_05, PCI_Clock_33MHz, 0x000fff00 }, // 8110SCd
-		{ RTL_GIGA_MAC_VER_05, PCI_Clock_66MHz, 0x000fffff },
-		{ RTL_GIGA_MAC_VER_06, PCI_Clock_33MHz, 0x00ffff00 }, // 8110SCe
-		{ RTL_GIGA_MAC_VER_06, PCI_Clock_66MHz, 0x00ffffff }
-	};
-	const struct rtl_cfg2_info *p = cfg2_info;
-	unsigned int i;
-	u32 clk;
+	u32 val;
 
-	clk = RTL_R8(tp, Config2) & PCI_Clock_66MHz;
-	for (i = 0; i < ARRAY_SIZE(cfg2_info); i++, p++) {
-		if ((p->mac_version == mac_version) && (p->clk == clk)) {
-			RTL_W32(tp, 0x7c, p->val);
-			break;
-		}
-	}
+	if (tp->mac_version == RTL_GIGA_MAC_VER_05)
+		val = 0x000fff00;
+	else if (tp->mac_version == RTL_GIGA_MAC_VER_06)
+		val = 0x00ffff00;
+	else
+		return;
+
+	if (RTL_R8(tp, Config2) & PCI_Clock_66MHz)
+		val |= 0xff;
+
+	RTL_W32(tp, 0x7c, val);
 }
 
 static void rtl_set_rx_mode(struct net_device *dev)
-- 
2.19.1

^ permalink raw reply related

* Re: [PATCH net] net/sched: properly init chain in case of multiple control actions
From: Cong Wang @ 2018-10-12 20:57 UTC (permalink / raw)
  To: Davide Caratti
  Cc: Jiri Pirko, Jamal Hadi Salim, David Miller,
	Linux Kernel Network Developers
In-Reply-To: <f08cf807a1b7d33a7cc3df3889cfde398ccbd152.1539376452.git.dcaratti@redhat.com>

On Fri, Oct 12, 2018 at 1:39 PM Davide Caratti <dcaratti@redhat.com> wrote:
> Several TC actions allow users to specify a fallback control action, that
> is usually stored in the action private data. 'goto chain x' never worked
> for that case, because the action handler was never initialized. There is
> only one 'goto_chain' handle per action: extend act_api to disallow 'goto
> chain' specified more than once in a rule. If the fallback control action
> is legally configured, use it to properly initialize the chain.

Why not just validate the fallback action in each action init()?
For example, checking tcfg_paction in tcf_gact_init().

I don't see the need of making it generic.

^ permalink raw reply

* [net  1/1] tipc: fix unsafe rcu locking when accessing publication list
From: Jon Maloy @ 2018-10-12 20:46 UTC (permalink / raw)
  To: davem, netdev
  Cc: gordan.mihaljevic, tung.q.nguyen, hoang.h.le, jon.maloy,
	canh.d.luu, ying.xue, tipc-discussion

From: Tung Nguyen <tung.q.nguyen@dektech.com.au>

The binding table's 'cluster_scope' list is rcu protected to handle
races between threads changing the list and those traversing the list at
the same moment. We have now found that the function named_distribute()
uses the regular list_for_each() macro to traverse the said list.
Likewise, the function tipc_named_withdraw() is removing items from the
same list using the regular list_del() call. When these two functions
execute in parallel we see occasional crashes.

This commit fixes this by adding the missing _rcu() suffixes.

Signed-off-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
---
 net/tipc/name_distr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 51b4b96..3cfeb9d 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 	struct sk_buff *buf;
 	struct distr_item *item;
 
-	list_del(&publ->binding_node);
+	list_del_rcu(&publ->binding_node);
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
 			ITEM_SIZE) * ITEM_SIZE;
 	u32 msg_rem = msg_dsz;
 
-	list_for_each_entry(publ, pls, binding_node) {
+	list_for_each_entry_rcu(publ, pls, binding_node) {
 		/* Prepare next buffer: */
 		if (!skb) {
 			skb = named_prepare_buf(net, PUBLICATION, msg_rem,
-- 
2.1.4

^ permalink raw reply related

* Re: [PATCH V2 net-next 5/5] ptp: Add a driver for InES time stamping IP core.
From: Rob Herring @ 2018-10-12 20:42 UTC (permalink / raw)
  To: Richard Cochran
  Cc: netdev, devicetree, Andrew Lunn, David Miller, Florian Fainelli,
	Jacob Keller, Mark Rutland, Miroslav Lichvar, Willem de Bruijn
In-Reply-To: <20181007173823.21590-6-richardcochran@gmail.com>

On Sun, Oct 07, 2018 at 10:38:23AM -0700, Richard Cochran wrote:
> The InES at the ZHAW offers a PTP time stamping IP core.  The FPGA
> logic recognizes and time stamps PTP frames on the MII bus.  This
> patch adds a driver for the core along with a device tree binding to
> allow hooking the driver to MII buses.
> 
> Signed-off-by: Richard Cochran <richardcochran@gmail.com>
> ---
>  Documentation/devicetree/bindings/ptp/ptp-ines.txt |  37 +

Bindings should be separate patch.

>  drivers/ptp/Kconfig                                |  10 +
>  drivers/ptp/Makefile                               |   1 +
>  drivers/ptp/ptp_ines.c                             | 870 +++++++++++++++++++++
>  4 files changed, 918 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/ptp/ptp-ines.txt
>  create mode 100644 drivers/ptp/ptp_ines.c
> 
> diff --git a/Documentation/devicetree/bindings/ptp/ptp-ines.txt b/Documentation/devicetree/bindings/ptp/ptp-ines.txt
> new file mode 100644
> index 000000000000..1484b62802c7
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/ptp/ptp-ines.txt
> @@ -0,0 +1,37 @@
> +ZHAW InES PTP time stamping IP core
> +
> +The IP core needs two different kinds of nodes.  The control node
> +lives somewhere in the memory map and specifies the address of the
> +control registers.  There can be up to three port handles placed as
> +attributes of PHY nodes.  These associate a particular MII bus with a
> +port index within the IP core.
> +
> +Required properties of the control node:
> +
> +- compatible:		"ines,ptp-ctrl"

ines is not registered vendor prefix. Should it be 'zhaw' instead?

> +- reg:			physical address and size of the register bank
> +- #phandle-cells:	must be one (1)

#timestamper-cells

Or if it is always 1, you could omit it.

> +
> +Required format of the port handle within the PHY node:
> +
> +- timestamper:		provides control node reference and
> +			the port channel within the IP core

This and #timestamper-cells need to be in a common binding doc.

And bonus points if you add a check in dtc for this. Should be a 
one-liner.

> +
> +Example:
> +
> +	tstamper: timestamper@60000000 {
> +		compatible = "ines,ptp-ctrl";
> +		reg = <0x60000000 0x80>;
> +		#phandle-cells = <1>;
> +	};
> +
> +	ethernet@80000000 {
> +		...
> +		mdio {
> +			...
> +			phy@3 {
> +				...
> +				timestamper = <&tstamper 0>;
> +			};
> +		};
> +	};

^ permalink raw reply

* [PATCH net] net/sched: properly init chain in case of multiple control actions
From: Davide Caratti @ 2018-10-12 20:39 UTC (permalink / raw)
  To: Jiri Pirko, Cong Wang, Jamal Hadi Salim, David S. Miller, netdev

the following script:

 # tc f a dev v0 egress chain 4 matchall action simple sdata "A triumph!"
 # tc f a dev v0 egress matchall action pass random determ goto chain 4 5

produces the following crash:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
 PGD 0 P4D 0
 Oops: 0000 [#1] SMP PTI
 CPU: 9 PID: 0 Comm: swapper/9 Not tainted 4.19.0-rc6.chainfix + #472
 Hardware name: Supermicro SYS-6027R-72RF/X9DRH-7TF/7F/iTF/iF, BIOS 3.0  07/26/2013
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffff9af96f843bf8 EFLAGS: 00010246
 RAX: 000000002000002a RBX: ffff9af9679cf200 RCX: 000000000000005a
 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff9af585e006c0
 RBP: ffff9af96f843ca0 R08: 0000000016000000 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9af968db4400
 R13: ffff9af968db4408 R14: 0000000000000001 R15: ffff9af585e006c0
 FS:  0000000000000000(0000) GS:ffff9af96f840000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 000000025980a001 CR4: 00000000001606e0
 Call Trace:
  <IRQ>
  tcf_classify+0x89/0x140
  __dev_queue_xmit+0x413/0x8a0
  ? ip6_finish_output2+0x336/0x520
  ip6_finish_output2+0x336/0x520
  ? ip6_output+0x68/0x110
  ip6_output+0x68/0x110
  ? ip6_fragment+0x9e0/0x9e0
  mld_sendpack+0x175/0x220
  ? mld_gq_timer_expire+0x40/0x40
  mld_dad_timer_expire+0x25/0x80
  call_timer_fn+0x2b/0x120
  run_timer_softirq+0x3e8/0x440
  ? tick_sched_timer+0x37/0x70
  ? __hrtimer_run_queues+0x118/0x290
  __do_softirq+0xe3/0x2bd
  irq_exit+0xe3/0xf0
  smp_apic_timer_interrupt+0x74/0x130
  apic_timer_interrupt+0xf/0x20
  </IRQ>
 RIP: 0010:cpuidle_enter_state+0xa5/0x320
 Code: 71 82 5f 7e e8 bc 25 ab ff 48 89 c3 0f 1f 44 00 00 31 ff e8 3d 36 ab ff 80 7c 24 07 00 0f 85 28 02 00 00 fb 66 0f 1f 44 00 00 <4c> 29 f3 48 ba cf f7 53 e3 a5 9b c4 20 48 89 d8 48 c1 fb 3f 48 f7
 RSP: 0018:ffffafa1832cbe90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
 RAX: ffff9af96f862600 RBX: 0000003ede349ac5 RCX: 000000000000001f
 RDX: 0000003ede349ac5 RSI: 00000000313b14ef RDI: 0000000000000000
 RBP: ffffcfa17fa40a00 R08: ffff9af96f85cdc0 R09: 000000000000afc8
 R10: ffffafa1832cbe70 R11: 000000000000afc8 R12: 0000000000000004
 R13: ffffffff82578bd8 R14: 0000003ec085dc50 R15: 0000000000000000
  do_idle+0x200/0x280
  cpu_startup_entry+0x6f/0x80
  start_secondary+0x1a7/0x200
  secondary_startup_64+0xa4/0xb0
 Modules linked in: act_gact act_simple cls_matchall sch_ingress veth intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ipmi_ssif ghash_clmulni_intel pcbc aesni_intel ipmi_si iTCO_wdt crypto_simd iTCO_vendor_support cryptd mei_me ipmi_devintf glue_helper mei joydev ipmi_msghandler pcc_cpufreq sg lpc_ich pcspkr i2c_i801 ioatdma wmi ip_tables xfs libcrc32c mlx4_en sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm isci drm libsas igb ahci libahci scsi_transport_sas mlx4_core be2net crc32c_intel dca libata i2c_algo_bit i2c_core megaraid_sas devlink dm_mirror dm_region_hash dm_log dm_mod
 CR2: 0000000000000000

Several TC actions allow users to specify a fallback control action, that
is usually stored in the action private data. 'goto chain x' never worked
for that case, because the action handler was never initialized. There is
only one 'goto_chain' handle per action: extend act_api to disallow 'goto
chain' specified more than once in a rule. If the fallback control action
is legally configured, use it to properly initialize the chain.

Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
 include/net/act_api.h  |  1 +
 net/sched/act_api.c    | 28 +++++++++++++++++++++++-----
 net/sched/act_gact.c   |  8 ++++++++
 net/sched/act_police.c | 13 +++++++++++++
 4 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 970303448c90..efc2309a6545 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -99,6 +99,7 @@ struct tc_action_ops {
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 	void	(*put_dev)(struct net_device *dev);
+	int	(*fallback_act)(const struct tc_action *a);
 };
 
 struct tc_action_net {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e12f8ef7baa4..3eaa61abf190 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -30,10 +30,9 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
+static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp,
+				      u32 chain_index)
 {
-	u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
-
 	if (!tp)
 		return -EINVAL;
 	a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
@@ -798,7 +797,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	struct tc_cookie *cookie = NULL;
 	char act_name[IFNAMSIZ];
 	struct nlattr *tb[TCA_ACT_MAX + 1];
+	bool do_init_chain = false;
 	struct nlattr *kind;
+	u32 chain_id;
 	int err;
 
 	if (name == NULL) {
@@ -886,7 +887,23 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 		module_put(a_o->owner);
 
 	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
-		err = tcf_action_goto_chain_init(a, tp);
+		do_init_chain = true;
+		chain_id = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
+		if (a_o->fallback_act && TC_ACT_EXT_CMP(a_o->fallback_act(a),
+							TC_ACT_GOTO_CHAIN)) {
+			NL_SET_ERR_MSG(extack, "Too many 'goto chain'");
+			return ERR_PTR(-EINVAL);
+		}
+	} else if (a_o->fallback_act) {
+		chain_id = a_o->fallback_act(a);
+		if (TC_ACT_EXT_CMP(chain_id, TC_ACT_GOTO_CHAIN)) {
+			do_init_chain = true;
+			chain_id &= TC_ACT_EXT_VAL_MASK;
+		}
+	}
+
+	if (do_init_chain) {
+		err = tcf_action_goto_chain_init(a, tp, chain_id);
 		if (err) {
 			tcf_action_destroy_1(a, bind);
 			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
@@ -894,7 +911,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 		}
 	}
 
-	if (!tcf_action_valid(a->tcfa_action)) {
+	if (!tcf_action_valid(a->tcfa_action) ||
+	    (a_o->fallback_act && !tcf_action_valid(a_o->fallback_act(a)))) {
 		tcf_action_destroy_1(a, bind);
 		NL_SET_ERR_MSG(extack, "Invalid control action value");
 		return ERR_PTR(-EINVAL);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index cd1d9bd32ef9..77554e87d658 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -47,6 +47,11 @@ static int gact_determ(struct tcf_gact *gact)
 
 typedef int (*g_rand)(struct tcf_gact *gact);
 static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
+
+static int tcf_gact_fallback_action(const struct tc_action *act)
+{
+	return to_gact(act)->tcfg_paction;
+}
 #endif /* CONFIG_GACT_PROB */
 
 static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -254,6 +259,9 @@ static struct tc_action_ops act_gact_ops = {
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
 	.get_fill_size	=	tcf_gact_get_fill_size,
+#ifdef CONFIG_GACT_PROB
+	.fallback_act	=	tcf_gact_fallback_action,
+#endif
 	.size		=	sizeof(struct tcf_gact),
 };
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 5d8bfa878477..03ecb063c415 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -320,6 +320,18 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_police_fallback_action(const struct tc_action *a)
+{
+	struct tcf_police *police = to_police(a);
+	int retval;
+
+	spin_lock_bh(&police->tcf_lock);
+	retval =  police->tcfp_result;
+	spin_unlock_bh(&police->tcf_lock);
+
+	return retval;
+}
+
 MODULE_AUTHOR("Alexey Kuznetsov");
 MODULE_DESCRIPTION("Policing actions");
 MODULE_LICENSE("GPL");
@@ -333,6 +345,7 @@ static struct tc_action_ops act_police_ops = {
 	.init		=	tcf_police_init,
 	.walk		=	tcf_police_walker,
 	.lookup		=	tcf_police_search,
+	.fallback_act	=	tcf_police_fallback_action,
 	.size		=	sizeof(struct tcf_police),
 };
 
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH bpf-next 4/8] tls: convert to generic sk_msg interface
From: Dave Watson @ 2018-10-12 20:16 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: alexei.starovoitov@gmail.com, john.fastabend@gmail.com,
	netdev@vger.kernel.org
In-Reply-To: <20181011004547.16662-5-daniel@iogearbox.net>

On 10/11/18 02:45 AM, Daniel Borkmann wrote:
> Convert kTLS over to make use of sk_msg interface for plaintext and
> encrypted scattergather data, so it reuses all the sk_msg helpers
> and data structure which later on in a second step enables to glue
> this to BPF.

Looks very clean, thanks!

> 
> -static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
> -			      int length, int *pages_used,
> -			      unsigned int *size_used,
> -			      struct scatterlist *to, int to_max_pages,
> -			      bool charge)
> -{

...

> -			err = zerocopy_from_iter(sk, out_iov, data_len, &pages,
> -						 chunk, &sgout[1],
> -						 (n_sgout - 1), false);
> +			err = tls_setup_from_iter(sk, out_iov, data_len,
> +						  &pages, chunk, &sgout[1],
> +						  (n_sgout - 1));

Any reason not to add the 'bool charge' to sk_msg_zerocopy_from_iter?
Then tls_setup_from_iter is not necessary.

^ permalink raw reply

* Re: [PATCH net] rxrpc: Fix incorrect conditional on IPV6
From: David Howells @ 2018-10-12 20:15 UTC (permalink / raw)
  To: David Miller; +Cc: dhowells, eric.dumazet, arnd, netdev, linux-afs
In-Reply-To: <20181012.111130.2147603962642962519.davem@davemloft.net>

David Miller <davem@davemloft.net> wrote:

> > Nit : Correct attribution would require a Reported-by: tag
> 
> Right.

And I've posted a new version with that and the reviewed-by from Arnd.

David

^ permalink raw reply

* Re: [PATCH v4] Wait for running BPF programs when updating map-in-map
From: Alexei Starovoitov @ 2018-10-13  2:31 UTC (permalink / raw)
  To: Daniel Colascione
  Cc: joelaf, linux-kernel, timmurray, netdev, Lorenzo Colitti,
	Chenbo Feng, Mathieu Desnoyers, Alexei Starovoitov,
	Daniel Borkmann
In-Reply-To: <20181012105427.243779-1-dancol@google.com>

On Fri, Oct 12, 2018 at 03:54:27AM -0700, Daniel Colascione wrote:
> The map-in-map frequently serves as a mechanism for atomic
> snapshotting of state that a BPF program might record.  The current
> implementation is dangerous to use in this way, however, since
> userspace has no way of knowing when all programs that might have
> retrieved the "old" value of the map may have completed.
> 
> This change ensures that map update operations on map-in-map map types
> always wait for all references to the old map to drop before returning
> to userspace.
> 
> Signed-off-by: Daniel Colascione <dancol@google.com>
> ---
>  kernel/bpf/syscall.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 8339d81cba1d..d7c16ae1e85a 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -741,6 +741,18 @@ static int map_lookup_elem(union bpf_attr *attr)
>  	return err;
>  }
>  
> +static void maybe_wait_bpf_programs(struct bpf_map *map)
> +{
> +	/* Wait for any running BPF programs to complete so that
> +	 * userspace, when we return to it, knows that all programs
> +	 * that could be running use the new map value.
> +	 */
> +	if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
> +	    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
> +		synchronize_rcu();
> +	}

extra {} were not necessary. I removed them while applying to bpf-next.
Please run checkpatch.pl next time.
Thanks

^ permalink raw reply

* [PATCH bpf-next 10/13] tools/bpf: do not use pahole if clang/llvm can generate BTF sections
From: Yonghong Song @ 2018-10-12 18:55 UTC (permalink / raw)
  To: ast, kafai, daniel, netdev; +Cc: kernel-team

Add additional checks in tools/testing/selftests/bpf and
samples/bpf such that if clang/llvm compiler can generate
BTF sections, do not use pahole.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 samples/bpf/Makefile                 | 8 ++++++++
 tools/testing/selftests/bpf/Makefile | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index be0a961450bc..870fe7ee2b69 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -208,12 +208,20 @@ endif
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+			  clang -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+			  readelf -S ./llvm_btf_verify.o | grep BTF; \
+			  /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+	EXTRA_CFLAGS += -g
+else
 ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
 	EXTRA_CFLAGS += -g
 	LLC_FLAGS += -mattr=dwarfris
 	DWARF2BTF = y
 endif
+endif
 
 # Trick to allow make to be run from this directory
 all:
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index d24afe8b821d..83240e54c6a8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -119,7 +119,14 @@ $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+			  clang -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \
+			  readelf -S ./llvm_btf_verify.o | grep BTF; \
+			  /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+	CLANG_FLAGS += -g
+else
 ifneq ($(BTF_LLC_PROBE),)
 ifneq ($(BTF_PAHOLE_PROBE),)
 ifneq ($(BTF_OBJCOPY_PROBE),)
@@ -129,6 +136,7 @@ ifneq ($(BTF_OBJCOPY_PROBE),)
 endif
 endif
 endif
+endif
 
 $(OUTPUT)/%.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
-- 
2.17.1

^ permalink raw reply related

* [PATCH bpf-next 11/13] tools/bpf: refactor to implement btf_get_from_id() in lib/bpf
From: Yonghong Song @ 2018-10-12 18:55 UTC (permalink / raw)
  To: ast, kafai, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20181012185526.2380717-1-yhs@fb.com>

The function get_btf() is implemented in tools/bpf/bpftool/map.c
to get a btf structure given a map_info. This patch
refactored this function to be function btf_get_from_id()
in tools/lib/bpf so that it can be used later.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/bpf/bpftool/map.c | 68 ++--------------------------------------
 tools/lib/bpf/btf.c     | 69 +++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h     |  1 +
 3 files changed, 72 insertions(+), 66 deletions(-)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 9f5de48f8a99..33c7dc8ddd86 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -203,70 +203,6 @@ static int do_dump_btf(const struct btf_dumper *d,
 	return ret;
 }
 
-static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
-{
-	struct bpf_btf_info btf_info = { 0 };
-	__u32 len = sizeof(btf_info);
-	__u32 last_size;
-	int btf_fd;
-	void *ptr;
-	int err;
-
-	err = 0;
-	*btf = NULL;
-	btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
-	if (btf_fd < 0)
-		return 0;
-
-	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
-	 * let's start with a sane default - 4KiB here - and resize it only if
-	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
-	 */
-	btf_info.btf_size = 4096;
-	last_size = btf_info.btf_size;
-	ptr = malloc(last_size);
-	if (!ptr) {
-		err = -ENOMEM;
-		goto exit_free;
-	}
-
-	bzero(ptr, last_size);
-	btf_info.btf = ptr_to_u64(ptr);
-	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
-
-	if (!err && btf_info.btf_size > last_size) {
-		void *temp_ptr;
-
-		last_size = btf_info.btf_size;
-		temp_ptr = realloc(ptr, last_size);
-		if (!temp_ptr) {
-			err = -ENOMEM;
-			goto exit_free;
-		}
-		ptr = temp_ptr;
-		bzero(ptr, last_size);
-		btf_info.btf = ptr_to_u64(ptr);
-		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
-	}
-
-	if (err || btf_info.btf_size > last_size) {
-		err = errno;
-		goto exit_free;
-	}
-
-	*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
-	if (IS_ERR(*btf)) {
-		err = PTR_ERR(*btf);
-		*btf = NULL;
-	}
-
-exit_free:
-	close(btf_fd);
-	free(ptr);
-
-	return err;
-}
-
 static json_writer_t *get_btf_writer(void)
 {
 	json_writer_t *jw = jsonw_new(stdout);
@@ -753,7 +689,7 @@ static int do_dump(int argc, char **argv)
 
 	prev_key = NULL;
 
-	err = get_btf(&info, &btf);
+	err = btf_get_from_id(info.btf_id, &btf);
 	if (err) {
 		p_err("failed to get btf");
 		goto exit_free;
@@ -897,7 +833,7 @@ static int do_lookup(int argc, char **argv)
 	}
 
 	/* here means bpf_map_lookup_elem() succeeded */
-	err = get_btf(&info, &btf);
+	err = btf_get_from_id(info.btf_id, &btf);
 	if (err) {
 		p_err("failed to get btf");
 		goto exit_free;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 4748e0bacd2b..ab654628e966 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -42,6 +42,11 @@ struct btf_ext {
 	__u32 func_info_len;
 };
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
 	if (btf->types_size - btf->nr_types < 2) {
@@ -403,6 +408,70 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
 		return NULL;
 }
 
+int btf_get_from_id(__u32 id, struct btf **btf)
+{
+	struct bpf_btf_info btf_info = { 0 };
+	__u32 len = sizeof(btf_info);
+	__u32 last_size;
+	int btf_fd;
+	void *ptr;
+	int err;
+
+	err = 0;
+	*btf = NULL;
+	btf_fd = bpf_btf_get_fd_by_id(id);
+	if (btf_fd < 0)
+		return 0;
+
+	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+	 * let's start with a sane default - 4KiB here - and resize it only if
+	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
+	 */
+	btf_info.btf_size = 4096;
+	last_size = btf_info.btf_size;
+	ptr = malloc(last_size);
+	if (!ptr) {
+		err = -ENOMEM;
+		goto exit_free;
+	}
+
+	bzero(ptr, last_size);
+	btf_info.btf = ptr_to_u64(ptr);
+	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+	if (!err && btf_info.btf_size > last_size) {
+		void *temp_ptr;
+
+		last_size = btf_info.btf_size;
+		temp_ptr = realloc(ptr, last_size);
+		if (!temp_ptr) {
+			err = -ENOMEM;
+			goto exit_free;
+		}
+		ptr = temp_ptr;
+		bzero(ptr, last_size);
+		btf_info.btf = ptr_to_u64(ptr);
+		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+	}
+
+	if (err || btf_info.btf_size > last_size) {
+		err = errno;
+		goto exit_free;
+	}
+
+	*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
+	if (IS_ERR(*btf)) {
+		err = PTR_ERR(*btf);
+		*btf = NULL;
+	}
+
+exit_free:
+	close(btf_fd);
+	free(ptr);
+
+	return err;
+}
+
 static int btf_ext_validate_func_info(const struct btf_sec_func_info *sinfo,
 				      __u32 size, btf_print_fn_t err_log)
 {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 2bbcc9e41cf5..51e63da844e6 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -44,6 +44,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 int btf__resolve_type(const struct btf *btf, __u32 type_id);
 int btf__fd(const struct btf *btf);
 const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+int btf_get_from_id(__u32 id, struct btf **btf);
 
 struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
 void btf_ext__free(struct btf_ext *btf_ext);
-- 
2.17.1

^ permalink raw reply related

* [PATCH bpf-next 13/13] tools/bpf: bpftool: add support for jited func types
From: Yonghong Song @ 2018-10-12 18:55 UTC (permalink / raw)
  To: ast, kafai, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20181012185526.2380717-1-yhs@fb.com>

This patch added support to print function signature
if btf func_info is available. Note that ksym
now uses function name instead of prog_name as
prog_name has a limit of 16 bytes including
ending '\0'.

The following is a sample output for selftests
test_btf with file test_btf_haskv.o:

  $ bpftool prog dump jited id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * ):
  bpf_prog_b07ccb89267cf242__dummy_tracepoint:
     0:   push   %rbp
     1:   mov    %rsp,%rbp
    ......
    3c:   add    $0x28,%rbp
    40:   leaveq
    41:   retq

  int test_long_fname_1(struct dummy_tracepoint_args * ):
  bpf_prog_2dcecc18072623fc_test_long_fname_1:
     0:   push   %rbp
     1:   mov    %rsp,%rbp
    ......
    3a:   add    $0x28,%rbp
    3e:   leaveq
    3f:   retq

  int test_long_fname_2(struct dummy_tracepoint_args * ):
  bpf_prog_89d64e4abf0f0126_test_long_fname_2:
     0:   push   %rbp
     1:   mov    %rsp,%rbp
    ......
    80:   add    $0x28,%rbp
    84:   leaveq
    85:   retq

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/bpf/bpftool/btf_dumper.c | 96 ++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h       |  2 +
 tools/bpf/bpftool/prog.c       | 54 +++++++++++++++++++
 3 files changed, 152 insertions(+)

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 55bc512a1831..a31df4202335 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -249,3 +249,99 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
 {
 	return btf_dumper_do_type(d, type_id, 0, data);
 }
+
+#define BTF_PRINT_STRING(str)						\
+	{								\
+		pos += snprintf(func_sig + pos, size - pos, str);	\
+		if (pos >= size)					\
+			return -1;					\
+	}
+#define BTF_PRINT_ONE_ARG(fmt, arg)					\
+	{								\
+		pos += snprintf(func_sig + pos, size - pos, fmt, arg);	\
+		if (pos >= size)					\
+			return -1;					\
+	}
+#define BTF_PRINT_TYPE_ONLY(type)					\
+	{								\
+		pos = __btf_dumper_type_only(btf, type, func_sig,	\
+					     pos, size);		\
+		if (pos == -1)						\
+			return -1;					\
+	}
+
+static int __btf_dumper_type_only(struct btf *btf, __u32 type_id,
+				  char *func_sig, int pos, int size)
+{
+	const struct btf_type *t = btf__type_by_id(btf, type_id);
+	const struct btf_array *array;
+	int i, vlen;
+
+	switch (BTF_INFO_KIND(t->info)) {
+	case BTF_KIND_INT:
+		BTF_PRINT_ONE_ARG("%s ",
+				  btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_STRUCT:
+		BTF_PRINT_ONE_ARG("struct %s ",
+				  btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_UNION:
+		BTF_PRINT_ONE_ARG("union %s ",
+				  btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_ENUM:
+		BTF_PRINT_ONE_ARG("enum %s ",
+				  btf__name_by_offset(btf, t->name_off));
+		break;
+	case BTF_KIND_ARRAY:
+		array = (struct btf_array *)(t + 1);
+		BTF_PRINT_TYPE_ONLY(array->type);
+		BTF_PRINT_ONE_ARG("[%d]", array->nelems);
+		break;
+	case BTF_KIND_PTR:
+		BTF_PRINT_TYPE_ONLY(t->type);
+		BTF_PRINT_STRING("* ");
+		break;
+	case BTF_KIND_UNKN:
+	case BTF_KIND_FWD:
+	case BTF_KIND_TYPEDEF:
+		return -1;
+	case BTF_KIND_VOLATILE:
+		BTF_PRINT_STRING("volatile ");
+		BTF_PRINT_TYPE_ONLY(t->type);
+		break;
+	case BTF_KIND_CONST:
+		BTF_PRINT_STRING("const ");
+		BTF_PRINT_TYPE_ONLY(t->type);
+		break;
+	case BTF_KIND_RESTRICT:
+		BTF_PRINT_STRING("restrict ");
+		BTF_PRINT_TYPE_ONLY(t->type);
+		break;
+	case BTF_KIND_FUNC:
+	case BTF_KIND_FUNC_PROTO:
+		BTF_PRINT_TYPE_ONLY(t->type);
+		BTF_PRINT_ONE_ARG("%s(", btf__name_by_offset(btf, t->name_off));
+		vlen = BTF_INFO_VLEN(t->info);
+		for (i = 0; i < vlen; i++) {
+			__u32 arg_type = ((__u32 *)(t + 1))[i];
+
+			BTF_PRINT_TYPE_ONLY(arg_type);
+			if (i != (vlen - 1))
+				BTF_PRINT_STRING(", ");
+		}
+		BTF_PRINT_STRING(")");
+		break;
+	default:
+		return -1;
+	}
+
+	return pos;
+}
+
+int btf_dumper_type_only(struct btf *btf, __u32 type_id, char *func_sig,
+			 int size)
+{
+	return __btf_dumper_type_only(btf, type_id, func_sig, 0, size);
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 40492cdc4e53..d3966cf2ea3e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -166,6 +166,8 @@ struct btf_dumper {
  */
 int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
 		    const void *data);
+int btf_dumper_type_only(struct btf *btf, __u32 func_type_id, char *func_only,
+			 int size);
 
 struct nlattr;
 struct ifinfomsg;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index b1cd3bc8db70..eb9b4bde614c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -47,6 +47,7 @@
 #include <linux/err.h>
 
 #include <bpf.h>
+#include <btf.h>
 #include <libbpf.h>
 
 #include "cfg.h"
@@ -427,9 +428,11 @@ static int do_show(int argc, char **argv)
 static int do_dump(int argc, char **argv)
 {
 	unsigned long *func_ksyms = NULL;
+	unsigned int *func_types = NULL;
 	struct bpf_prog_info info = {};
 	unsigned int *func_lens = NULL;
 	unsigned int nr_func_ksyms;
+	unsigned int nr_func_types;
 	unsigned int nr_func_lens;
 	struct dump_data dd = {};
 	__u32 len = sizeof(info);
@@ -526,6 +529,16 @@ static int do_dump(int argc, char **argv)
 		}
 	}
 
+	nr_func_types = info.nr_jited_func_types;
+	if (nr_func_types) {
+		func_types = malloc(nr_func_types * sizeof(__u32));
+		if (!func_types) {
+			p_err("mem alloc failed");
+			close(fd);
+			goto err_free;
+		}
+	}
+
 	memset(&info, 0, sizeof(info));
 
 	*member_ptr = ptr_to_u64(buf);
@@ -534,6 +547,8 @@ static int do_dump(int argc, char **argv)
 	info.nr_jited_ksyms = nr_func_ksyms;
 	info.jited_func_lens = ptr_to_u64(func_lens);
 	info.nr_jited_func_lens = nr_func_lens;
+	info.jited_func_types = ptr_to_u64(func_types);
+	info.nr_jited_func_types = nr_func_types;
 
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	close(fd);
@@ -557,6 +572,11 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if (info.nr_jited_func_types > nr_func_types) {
+		p_err("too many types returned");
+		goto err_free;
+	}
+
 	if ((member_len == &info.jited_prog_len &&
 	     info.jited_prog_insns == 0) ||
 	    (member_len == &info.xlated_prog_len &&
@@ -565,6 +585,12 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if (info.btf_id &&
+	    info.nr_jited_func_lens != info.nr_jited_func_types) {
+		p_err("unequal jited func lens and types");
+		goto err_free;
+	}
+
 	if (filepath) {
 		fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600);
 		if (fd < 0) {
@@ -598,7 +624,9 @@ static int do_dump(int argc, char **argv)
 			struct kernel_sym *sym = NULL;
 			char sym_name[SYM_MAX_NAME];
 			unsigned char *img = buf;
+			struct btf *btf = NULL;
 			__u64 *ksyms = NULL;
+			char func_sig[1024];
 			__u32 *lens;
 			__u32 i;
 
@@ -607,6 +635,14 @@ static int do_dump(int argc, char **argv)
 				ksyms = (__u64 *) info.jited_ksyms;
 			}
 
+			if (info.btf_id) {
+				err = btf_get_from_id(info.btf_id, &btf);
+				if (err) {
+					p_err("failed to get btf");
+					goto err_free;
+				}
+			}
+
 			if (json_output)
 				jsonw_start_array(json_wtr);
 
@@ -622,12 +658,28 @@ static int do_dump(int argc, char **argv)
 					strcpy(sym_name, "unknown");
 				}
 
+				func_sig[0] = '\0';
+				if (btf) {
+					err = btf_dumper_type_only(btf,
+								   func_types[i],
+								   func_sig,
+								   sizeof(func_sig));
+					if (err < 0)
+						func_sig[0] = '\0';
+				}
+
 				if (json_output) {
 					jsonw_start_object(json_wtr);
+					if (func_sig[0] != '\0') {
+						jsonw_name(json_wtr, "proto");
+						jsonw_string(json_wtr, func_sig);
+					}
 					jsonw_name(json_wtr, "name");
 					jsonw_string(json_wtr, sym_name);
 					jsonw_name(json_wtr, "insns");
 				} else {
+					if (func_sig[0] != '\0')
+						printf("%s:\n", func_sig);
 					printf("%s:\n", sym_name);
 				}
 
@@ -665,12 +717,14 @@ static int do_dump(int argc, char **argv)
 	free(buf);
 	free(func_ksyms);
 	free(func_lens);
+	free(func_types);
 	return 0;
 
 err_free:
 	free(buf);
 	free(func_ksyms);
 	free(func_lens);
+	free(func_types);
 	return -1;
 }
 
-- 
2.17.1

^ permalink raw reply related

* [PATCH bpf-next 12/13] tools/bpf: enhance test_btf file testing to test func info
From: Yonghong Song @ 2018-10-12 18:55 UTC (permalink / raw)
  To: ast, kafai, daniel, netdev; +Cc: kernel-team
In-Reply-To: <20181012185526.2380717-1-yhs@fb.com>

Change the bpf programs test_btf_haskv.c and test_btf_nokv.c to
have two sections, and enhance test_btf.c test_file feature
to test btf func_info returned by the kernel.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/testing/selftests/bpf/test_btf.c       | 72 +++++++++++++++++++-
 tools/testing/selftests/bpf/test_btf_haskv.c | 16 ++++-
 tools/testing/selftests/bpf/test_btf_nokv.c  | 16 ++++-
 3 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index e03a8cea4bb7..0bbefb571426 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -2235,10 +2235,16 @@ static int file_has_btf_elf(const char *fn)
 static int do_test_file(unsigned int test_num)
 {
 	const struct btf_file_test *test = &file_tests[test_num - 1];
+	const char *expected_fnames[] = {"_dummy_tracepoint",
+					 "test_long_fname_1",
+					 "test_long_fname_2"};
+	__u32 func_lens[10], func_types[10], info_len;
+	struct bpf_prog_info info = {};
 	struct bpf_object *obj = NULL;
 	struct bpf_program *prog;
+	struct btf *btf = NULL;
 	struct bpf_map *map;
-	int err;
+	int i, err, prog_fd;
 
 	fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
 		test->file);
@@ -2271,6 +2277,7 @@ static int do_test_file(unsigned int test_num)
 	err = bpf_object__load(obj);
 	if (CHECK(err < 0, "bpf_object__load: %d", err))
 		goto done;
+	prog_fd = bpf_program__fd(prog);
 
 	map = bpf_object__find_map_by_name(obj, "btf_map");
 	if (CHECK(!map, "btf_map not found")) {
@@ -2285,6 +2292,69 @@ static int do_test_file(unsigned int test_num)
 		  test->btf_kv_notfound))
 		goto done;
 
+	if (!jit_enabled)
+		goto skip_jit;
+
+	info_len = sizeof(struct bpf_prog_info);
+	info.nr_jited_func_types = ARRAY_SIZE(func_types);
+	info.nr_jited_func_lens = ARRAY_SIZE(func_lens);
+	info.jited_func_types = ptr_to_u64(&func_types[0]);
+	info.jited_func_lens = ptr_to_u64(&func_lens[0]);
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+
+	if (CHECK(err == -1, "invalid get info errno:%d", errno)) {
+		fprintf(stderr, "%s\n", btf_log_buf);
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.nr_jited_func_lens != 3,
+		  "incorrect info.nr_jited_func_lens %d",
+		  info.nr_jited_func_lens)) {
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.nr_jited_func_types != 3,
+		  "incorrect info.nr_jited_func_types %d",
+		  info.nr_jited_func_types)) {
+		err = -1;
+		goto done;
+	}
+	if (CHECK(info.btf_id == 0, "incorrect btf_id = 0")) {
+		err = -1;
+		goto done;
+	}
+
+	err = btf_get_from_id(info.btf_id, &btf);
+	if (CHECK(err, "cannot get btf from kernel, err: %d", err))
+		goto done;
+
+	/* check three functions */
+	for (i = 0; i < 3; i++) {
+		const struct btf_type *t;
+		const char *fname;
+
+		t = btf__type_by_id(btf, func_types[i]);
+		if (CHECK(!t, "btf__type_by_id failure: id %u",
+			  func_types[i])) {
+			err = -1;
+			goto done;
+		}
+
+		fname = btf__name_by_offset(btf, t->name_off);
+		err = strcmp(fname, expected_fnames[i]);
+		/* for the second and third functions in .text section,
+		 * the compiler may order them either way.
+		 */
+		if (i && err)
+			err = strcmp(fname, expected_fnames[3 - i]);
+		if (CHECK(err, "incorrect fname %s", fname ? : "")) {
+			err = -1;
+			goto done;
+		}
+	}
+
+skip_jit:
 	fprintf(stderr, "OK");
 
 done:
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
index b21b876f475d..e5c79fe0ffdb 100644
--- a/tools/testing/selftests/bpf/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -24,8 +24,8 @@ struct dummy_tracepoint_args {
 	struct sock *sock;
 };
 
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
 {
 	struct ipv_counts *counts;
 	int key = 0;
@@ -42,4 +42,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
 	return 0;
 }
 
+__attribute__((noinline))
+static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_2(arg);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_1(arg);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
index 0ed8e088eebf..434188c37774 100644
--- a/tools/testing/selftests/bpf/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -22,8 +22,8 @@ struct dummy_tracepoint_args {
 	struct sock *sock;
 };
 
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
 {
 	struct ipv_counts *counts;
 	int key = 0;
@@ -40,4 +40,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
 	return 0;
 }
 
+__attribute__((noinline))
+static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_2(arg);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_1(arg);
+}
+
 char _license[] SEC("license") = "GPL";
-- 
2.17.1

^ permalink raw reply related

* [iproute2 PATCH] tc: flower: Classify packets based port ranges
From: Amritha Nambiar @ 2018-10-12 13:54 UTC (permalink / raw)
  To: stephen, netdev
  Cc: jakub.kicinski, amritha.nambiar, sridhar.samudrala, jhs,
	xiyou.wangcong, jiri

Added support for filtering based on port ranges.

Example:
1. Match on a port range:
-------------------------
$ tc filter add dev enp4s0 protocol ip parent ffff:\
  prio 1 flower ip_proto tcp dst_port range 20-30 skip_hw\
  action drop

$ tc -s filter show dev enp4s0 parent ffff:
filter protocol ip pref 1 flower chain 0
filter protocol ip pref 1 flower chain 0 handle 0x1
  eth_type ipv4
  ip_proto tcp
  dst_port_min 20
  dst_port_max 30
  skip_hw
  not_in_hw
        action order 1: gact action drop
         random type none pass val 0
         index 1 ref 1 bind 1 installed 181 sec used 5 sec
        Action statistics:
        Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0)
        backlog 0b 0p requeues 0

2. Match on IP address and port range:
--------------------------------------
$ tc filter add dev enp4s0 protocol ip parent ffff:\
  prio 1 flower dst_ip 192.168.1.1 ip_proto tcp dst_port range 100-200\
  skip_hw action drop

$ tc -s filter show dev enp4s0 parent ffff:
filter protocol ip pref 1 flower chain 0 handle 0x2
  eth_type ipv4
  ip_proto tcp
  dst_ip 192.168.1.1
  dst_port_min 100
  dst_port_max 200
  skip_hw
  not_in_hw
        action order 1: gact action drop
         random type none pass val 0
         index 2 ref 1 bind 1 installed 28 sec used 6 sec
        Action statistics:
        Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0)
        backlog 0b 0p requeues 0

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 include/uapi/linux/pkt_cls.h |    5 +
 tc/f_flower.c                |  145 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 140 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index be382fb..3d9727f 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -405,6 +405,11 @@ enum {
 	TCA_FLOWER_KEY_UDP_SRC,		/* be16 */
 	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
 
+	TCA_FLOWER_KEY_PORT_SRC_MIN,	/* be16 */
+	TCA_FLOWER_KEY_PORT_SRC_MAX,	/* be16 */
+	TCA_FLOWER_KEY_PORT_DST_MIN,	/* be16 */
+	TCA_FLOWER_KEY_PORT_DST_MAX,	/* be16 */
+
 	TCA_FLOWER_FLAGS,
 	TCA_FLOWER_KEY_VLAN_ID,		/* be16 */
 	TCA_FLOWER_KEY_VLAN_PRIO,	/* u8   */
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 59e5f57..1a7bc80 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -33,6 +33,11 @@ enum flower_endpoint {
 	FLOWER_ENDPOINT_DST
 };
 
+struct range_type {
+	__be16 min_port_type;
+	__be16 max_port_type;
+};
+
 enum flower_icmp_field {
 	FLOWER_ICMP_FIELD_TYPE,
 	FLOWER_ICMP_FIELD_CODE
@@ -493,6 +498,64 @@ static int flower_parse_port(char *str, __u8 ip_proto,
 	return 0;
 }
 
+static int flower_port_range_attr_type(__u8 ip_proto, enum flower_endpoint type,
+				       struct range_type *range)
+{
+	if (ip_proto == IPPROTO_TCP || ip_proto == IPPROTO_UDP ||
+	    ip_proto == IPPROTO_SCTP) {
+		if (type == FLOWER_ENDPOINT_SRC) {
+			range->min_port_type = TCA_FLOWER_KEY_PORT_SRC_MIN;
+			range->max_port_type = TCA_FLOWER_KEY_PORT_SRC_MAX;
+		} else {
+			range->min_port_type = TCA_FLOWER_KEY_PORT_DST_MIN;
+			range->max_port_type = TCA_FLOWER_KEY_PORT_DST_MAX;
+		}
+	} else {
+		return -1;
+	}
+
+	return 0;
+}
+
+static int flower_parse_port_range(__be16 *min, __be16 *max, __u8 ip_proto,
+				   enum flower_endpoint endpoint,
+				   struct nlmsghdr *n)
+{
+	struct range_type range;
+
+	flower_port_range_attr_type(ip_proto, endpoint, &range);
+	addattr16(n, MAX_MSG, range.min_port_type, *min);
+	addattr16(n, MAX_MSG, range.max_port_type, *max);
+
+	return 0;
+}
+
+static int get_range(__be16 *min, __be16 *max, char *argv)
+{
+	char *r;
+
+	r = strchr(argv, '-');
+	if (r) {
+		*r = '\0';
+		if (get_be16(min, argv, 10)) {
+			fprintf(stderr, "invalid min range\n");
+			return -1;
+		}
+		if (get_be16(max, r + 1, 10)) {
+			fprintf(stderr, "invalid max range\n");
+			return -1;
+		}
+		if (htons(*max) <= htons(*min)) {
+			fprintf(stderr, "max value should be greater than min value\n");
+			return -1;
+		}
+	} else {
+		fprintf(stderr, "Illegal range format\n");
+		return -1;
+	}
+	return 0;
+}
+
 #define TCP_FLAGS_MAX_MASK 0xfff
 
 static int flower_parse_tcp_flags(char *str, int flags_type, int mask_type,
@@ -887,20 +950,54 @@ static int flower_parse_opt(struct filter_util *qu, char *handle,
 				return -1;
 			}
 		} else if (matches(*argv, "dst_port") == 0) {
+			__be16 min, max;
+
 			NEXT_ARG();
-			ret = flower_parse_port(*argv, ip_proto,
-						FLOWER_ENDPOINT_DST, n);
-			if (ret < 0) {
-				fprintf(stderr, "Illegal \"dst_port\"\n");
-				return -1;
+			if (matches(*argv, "range") == 0) {
+				NEXT_ARG();
+				ret = get_range(&min, &max, *argv);
+				if (ret < 0)
+					return -1;
+				ret = flower_parse_port_range(&min, &max,
+							      ip_proto,
+							      FLOWER_ENDPOINT_DST,
+							      n);
+				if (ret < 0) {
+					fprintf(stderr, "Illegal \"dst_port range\"\n");
+					return -1;
+				}
+			} else {
+				ret = flower_parse_port(*argv, ip_proto,
+							FLOWER_ENDPOINT_DST, n);
+				if (ret < 0) {
+					fprintf(stderr, "Illegal \"dst_port\"\n");
+					return -1;
+				}
 			}
 		} else if (matches(*argv, "src_port") == 0) {
+			__be16 min, max;
+
 			NEXT_ARG();
-			ret = flower_parse_port(*argv, ip_proto,
-						FLOWER_ENDPOINT_SRC, n);
-			if (ret < 0) {
-				fprintf(stderr, "Illegal \"src_port\"\n");
-				return -1;
+			if (matches(*argv, "range") == 0) {
+				NEXT_ARG();
+				ret = get_range(&min, &max, *argv);
+				if (ret < 0)
+					return -1;
+				ret = flower_parse_port_range(&min, &max,
+							      ip_proto,
+							      FLOWER_ENDPOINT_SRC,
+							      n);
+				if (ret < 0) {
+					fprintf(stderr, "Illegal \"src_port range\"\n");
+					return -1;
+				}
+			} else {
+				ret = flower_parse_port(*argv, ip_proto,
+							FLOWER_ENDPOINT_SRC, n);
+				if (ret < 0) {
+					fprintf(stderr, "Illegal \"src_port\"\n");
+					return -1;
+				}
 			}
 		} else if (matches(*argv, "tcp_flags") == 0) {
 			NEXT_ARG();
@@ -1309,6 +1406,17 @@ static void flower_print_port(char *name, struct rtattr *attr)
 	print_hu(PRINT_ANY, name, namefrm, rta_getattr_be16(attr));
 }
 
+static void flower_print_port_range(char *name, struct rtattr *attr)
+{
+	SPRINT_BUF(namefrm);
+
+	if (!attr)
+		return;
+
+	sprintf(namefrm, "\n  %s %%u", name);
+	print_uint(PRINT_ANY, name, namefrm, rta_getattr_be16(attr));
+}
+
 static void flower_print_tcp_flags(const char *name, struct rtattr *flags_attr,
 				   struct rtattr *mask_attr)
 {
@@ -1398,6 +1506,7 @@ static int flower_print_opt(struct filter_util *qu, FILE *f,
 			    struct rtattr *opt, __u32 handle)
 {
 	struct rtattr *tb[TCA_FLOWER_MAX + 1];
+	struct range_type range;
 	int nl_type, nl_mask_type;
 	__be16 eth_type = 0;
 	__u8 ip_proto = 0xff;
@@ -1516,6 +1625,22 @@ static int flower_print_opt(struct filter_util *qu, FILE *f,
 	if (nl_type >= 0)
 		flower_print_port("src_port", tb[nl_type]);
 
+	if (flower_port_range_attr_type(ip_proto, FLOWER_ENDPOINT_DST, &range)
+	    == 0) {
+		flower_print_port_range("dst_port_min",
+					tb[range.min_port_type]);
+		flower_print_port_range("dst_port_max",
+					tb[range.max_port_type]);
+	}
+
+	if (flower_port_range_attr_type(ip_proto, FLOWER_ENDPOINT_SRC, &range)
+	    == 0) {
+		flower_print_port_range("src_port_min",
+					tb[range.min_port_type]);
+		flower_print_port_range("src_port_max",
+					tb[range.max_port_type]);
+	}
+
 	flower_print_tcp_flags("tcp_flags", tb[TCA_FLOWER_KEY_TCP_FLAGS],
 			       tb[TCA_FLOWER_KEY_TCP_FLAGS_MASK]);
 

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox