netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] net: default_rps_mask follow-up
@ 2023-02-15 18:33 Paolo Abeni
  2023-02-15 18:33 ` [PATCH net-next 1/2] net: make default_rps_mask a per netns attribute Paolo Abeni
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Paolo Abeni @ 2023-02-15 18:33 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Jakub Kicinski, Eric Dumazet, Shuah Khan

The first patch namespacify the setting: once proper isolation
is in place in the main namespace, additional demux in the child
namespaces will be redundant.
The 2nd patch adds more self-tests coverage.

Paolo Abeni (2):
  net: make default_rps_mask a per netns attribute
  self-tests: more rps self tests

 include/linux/netdevice.h                     |  1 -
 include/net/netns/core.h                      |  5 ++
 net/core/net-sysfs.c                          | 23 ++++++---
 net/core/sysctl_net_core.c                    | 50 ++++++++++++++-----
 .../testing/selftests/net/rps_default_mask.sh | 41 ++++++++++-----
 5 files changed, 87 insertions(+), 33 deletions(-)

-- 
2.39.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next 1/2] net: make default_rps_mask a per netns attribute
  2023-02-15 18:33 [PATCH net-next 0/2] net: default_rps_mask follow-up Paolo Abeni
@ 2023-02-15 18:33 ` Paolo Abeni
  2023-02-16 13:06   ` kernel test robot
  2023-02-15 18:33 ` [PATCH net-next 2/2] self-tests: more rps self tests Paolo Abeni
  2023-02-15 19:29 ` [PATCH net-next 0/2] net: default_rps_mask follow-up Jakub Kicinski
  2 siblings, 1 reply; 7+ messages in thread
From: Paolo Abeni @ 2023-02-15 18:33 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Jakub Kicinski, Eric Dumazet, Shuah Khan

That really was meant to be a per netns attribute from the beginning.

The idea is that once proper isolation is in place in the main
namespace, additional demux in the child namespaces will be redundant.
Let's make child netns default rps mask empty by default.

To avoid bloating the netns with a possibly large cpumask, allocate
it on-demand during the first write operation.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/netdevice.h  |  1 -
 include/net/netns/core.h   |  5 ++++
 net/core/net-sysfs.c       | 23 ++++++++++++------
 net/core/sysctl_net_core.c | 50 ++++++++++++++++++++++++++++----------
 4 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d9cdbc047b49..409300524822 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -224,7 +224,6 @@ struct net_device_core_stats {
 #include <linux/static_key.h>
 extern struct static_key_false rps_needed;
 extern struct static_key_false rfs_needed;
-extern struct cpumask rps_default_mask;
 #endif
 
 struct neighbour;
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 8249060cf5d0..a91ef9f8de60 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -6,6 +6,7 @@
 
 struct ctl_table_header;
 struct prot_inuse;
+struct cpumask;
 
 struct netns_core {
 	/* core sysctls */
@@ -17,6 +18,10 @@ struct netns_core {
 #ifdef CONFIG_PROC_FS
 	struct prot_inuse __percpu *prot_inuse;
 #endif
+
+#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
+	struct cpumask *rps_default_mask;
+#endif
 };
 
 #endif
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e20784b6f873..15e3f4606b5f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1060,6 +1060,18 @@ static const struct kobj_type rx_queue_ktype = {
 	.get_ownership = rx_queue_get_ownership,
 };
 
+static int rx_queue_default_mask(struct net_device *dev,
+				 struct netdev_rx_queue *queue)
+{
+#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
+	struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask);
+
+	if (rps_default_mask && !cpumask_empty(rps_default_mask))
+		return netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
+#endif
+	return 0;
+}
+
 static int rx_queue_add_kobject(struct net_device *dev, int index)
 {
 	struct netdev_rx_queue *queue = dev->_rx + index;
@@ -1083,13 +1095,10 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
 			goto err;
 	}
 
-#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
-	if (!cpumask_empty(&rps_default_mask)) {
-		error = netdev_rx_queue_set_rps_mask(queue, &rps_default_mask);
-		if (error)
-			goto err;
-	}
-#endif
+	error = rx_queue_default_mask(dev, queue);
+	if (error)
+		goto err;
+
 	kobject_uevent(kobj, KOBJ_ADD);
 
 	return error;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7130e6d9e263..e9848f35495d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -74,24 +74,46 @@ static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
 #endif
 
 #ifdef CONFIG_RPS
-struct cpumask rps_default_mask;
+
+static struct cpumask *rps_default_mask_cow_alloc(struct net *net)
+{
+	struct cpumask *rps_default_mask;
+
+	if (net->core.rps_default_mask)
+		return net->core.rps_default_mask;
+
+	if (!zalloc_cpumask_var(&rps_default_mask, GFP_KERNEL))
+		return NULL;
+
+	/* pairs with READ_ONCE in rx_queue_default_mask() */
+	WRITE_ONCE(net->core.rps_default_mask, rps_default_mask);
+	return rps_default_mask;
+}
 
 static int rps_default_mask_sysctl(struct ctl_table *table, int write,
 				   void *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct net *net = (struct net *)table->data;
 	int err = 0;
 
 	rtnl_lock();
 	if (write) {
-		err = cpumask_parse(buffer, &rps_default_mask);
+		struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net);
+
+		err = -ENOMEM;
+		if (!rps_default_mask)
+			goto done;
+
+		err = cpumask_parse(buffer, rps_default_mask);
 		if (err)
 			goto done;
 
-		err = rps_cpumask_housekeeping(&rps_default_mask);
+		err = rps_cpumask_housekeeping(rps_default_mask);
 		if (err)
 			goto done;
 	} else {
-		dump_cpumask(buffer, lenp, ppos, &rps_default_mask);
+		dump_cpumask(buffer, lenp, ppos,
+			     net->core.rps_default_mask ? : cpu_none_mask);
 	}
 
 done:
@@ -508,11 +530,6 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= rps_sock_flow_sysctl
 	},
-	{
-		.procname	= "rps_default_mask",
-		.mode		= 0644,
-		.proc_handler	= rps_default_mask_sysctl
-	},
 #endif
 #ifdef CONFIG_NET_FLOW_LIMIT
 	{
@@ -639,6 +656,14 @@ static struct ctl_table net_core_table[] = {
 };
 
 static struct ctl_table netns_core_table[] = {
+#if IS_ENABLED(CONFIG_RPS)
+	{
+		.procname	= "rps_default_mask",
+		.data		= &init_net,
+		.mode		= 0644,
+		.proc_handler	= rps_default_mask_sysctl
+	},
+#endif
 	{
 		.procname	= "somaxconn",
 		.data		= &init_net.core.sysctl_somaxconn,
@@ -706,6 +731,9 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
 	tbl = net->core.sysctl_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->core.sysctl_hdr);
 	BUG_ON(tbl == netns_core_table);
+#if IS_ENABLED(CONFIG_RPS)
+	kfree(net->core.rps_default_mask);
+#endif
 	kfree(tbl);
 }
 
@@ -716,10 +744,6 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
 
 static __init int sysctl_core_init(void)
 {
-#if IS_ENABLED(CONFIG_RPS)
-	cpumask_copy(&rps_default_mask, cpu_none_mask);
-#endif
-
 	register_net_sysctl(&init_net, "net/core", net_core_table);
 	return register_pernet_subsys(&sysctl_core_ops);
 }
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 2/2] self-tests: more rps self tests
  2023-02-15 18:33 [PATCH net-next 0/2] net: default_rps_mask follow-up Paolo Abeni
  2023-02-15 18:33 ` [PATCH net-next 1/2] net: make default_rps_mask a per netns attribute Paolo Abeni
@ 2023-02-15 18:33 ` Paolo Abeni
  2023-02-15 19:29 ` [PATCH net-next 0/2] net: default_rps_mask follow-up Jakub Kicinski
  2 siblings, 0 replies; 7+ messages in thread
From: Paolo Abeni @ 2023-02-15 18:33 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Jakub Kicinski, Eric Dumazet, Shuah Khan

Explicitly check for child netns and main ns independency

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../testing/selftests/net/rps_default_mask.sh | 41 +++++++++++++------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index c81c0ac7ddfe..0fd0d2db3abc 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -8,7 +8,9 @@ ret=0
 [ $cpus -gt 2 ] || exit $ksft_skip
 
 readonly INITIAL_RPS_DEFAULT_MASK=$(cat /proc/sys/net/core/rps_default_mask)
-readonly NETNS="ns-$(mktemp -u XXXXXX)"
+readonly TAG="$(mktemp -u XXXXXX)"
+readonly VETH="veth${TAG}"
+readonly NETNS="ns-${TAG}"
 
 setup() {
 	ip netns add "${NETNS}"
@@ -21,11 +23,15 @@ cleanup() {
 }
 
 chk_rps() {
-	local rps_mask expected_rps_mask=$3
-	local dev_name=$2
+	local rps_mask expected_rps_mask=$4
+	local dev_name=$3
+	local netns=$2
+	local cmd="cat"
 	local msg=$1
 
-	rps_mask=$(ip netns exec $NETNS cat /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
+	[ -n "$netns" ] && cmd="ip netns exec $netns $cmd"
+
+	rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
 	printf "%-60s" "$msg"
 	if [ $rps_mask -eq $expected_rps_mask ]; then
 		echo "[ ok ]"
@@ -39,19 +45,30 @@ trap cleanup EXIT
 
 echo 0 > /proc/sys/net/core/rps_default_mask
 setup
-chk_rps "empty rps_default_mask" lo 0
+chk_rps "empty rps_default_mask" $NETNS lo 0
 cleanup
 
 echo 1 > /proc/sys/net/core/rps_default_mask
 setup
-chk_rps "non zero rps_default_mask" lo 1
+chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
 
 echo 3 > /proc/sys/net/core/rps_default_mask
-chk_rps "changing rps_default_mask dont affect existing netns" lo 1
+chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0
+
+ip link add name $VETH type veth peer netns $NETNS name $VETH
+ip link set dev $VETH up
+ip -n $NETNS link set dev $VETH up
+chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
+chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+ip netns del $NETNS
+
+setup
+chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0
+
+ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1
+ip link add name $VETH type veth peer netns $NETNS name $VETH
+chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1
+chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0
 
-ip -n $NETNS link add type veth
-ip -n $NETNS link set dev veth0 up
-ip -n $NETNS link set dev veth1 up
-chk_rps "changing rps_default_mask affect newly created devices" veth0 3
-chk_rps "changing rps_default_mask affect newly created devices[II]" veth1 3
 exit $ret
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 0/2] net: default_rps_mask follow-up
  2023-02-15 18:33 [PATCH net-next 0/2] net: default_rps_mask follow-up Paolo Abeni
  2023-02-15 18:33 ` [PATCH net-next 1/2] net: make default_rps_mask a per netns attribute Paolo Abeni
  2023-02-15 18:33 ` [PATCH net-next 2/2] self-tests: more rps self tests Paolo Abeni
@ 2023-02-15 19:29 ` Jakub Kicinski
  2023-02-15 20:33   ` Paolo Abeni
  2 siblings, 1 reply; 7+ messages in thread
From: Jakub Kicinski @ 2023-02-15 19:29 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: netdev, David S. Miller, Eric Dumazet, Shuah Khan

On Wed, 15 Feb 2023 19:33:35 +0100 Paolo Abeni wrote:
> The first patch namespacify the setting: once proper isolation
> is in place in the main namespace, additional demux in the child
> namespaces will be redundant.

Would you mind spelling this out again for me? If I create a veth with
the peer in a netns, the local end will get one RPS mask and the netns
end will get a RPS mask from the netns. If the daemon is not aware of
having to configure RPS masks (which I believe was your use case) then
it won't set the default mask in the netns either.. so we assume veth
is first created and then moved, or we don't use veth, or I'm lost
completely?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 0/2] net: default_rps_mask follow-up
  2023-02-15 19:29 ` [PATCH net-next 0/2] net: default_rps_mask follow-up Jakub Kicinski
@ 2023-02-15 20:33   ` Paolo Abeni
  2023-02-15 23:05     ` Jakub Kicinski
  0 siblings, 1 reply; 7+ messages in thread
From: Paolo Abeni @ 2023-02-15 20:33 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, David S. Miller, Eric Dumazet, Shuah Khan

On Wed, 2023-02-15 at 11:29 -0800, Jakub Kicinski wrote:
> On Wed, 15 Feb 2023 19:33:35 +0100 Paolo Abeni wrote:
> > The first patch namespacify the setting: once proper isolation
> > is in place in the main namespace, additional demux in the child
> > namespaces will be redundant.
> 
> Would you mind spelling this out again for me? If I create a veth with
> the peer in a netns, the local end will get one RPS mask and the netns
> end will get a RPS mask from the netns. 

Which should be likely no RPS mask at all.

> If the daemon is not aware of having to configure RPS masks 
> (which I believe was your use case) then
> it won't set the default mask in the netns either.. 

The goal is exactly that: avoiding the long way via the daemon (or
other user-space tool) and the sysfs.

Without this patch the child-ns veth gets the same RPS setting as the
main veth one.

That is not needed, as every other devices forwarding packets to the
netns has proper isolation (RPS or IRQ affinity) already set. If the
child ns device RPS configuration is left unchanged, the incoming
packets in the child netns go through an unneeded RPS stage, which
could be a bad thing if the selected CPU is on a different NUMA node.


Please let me know if the above clarifies the scenario.

Cheers,

Paolo


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 0/2] net: default_rps_mask follow-up
  2023-02-15 20:33   ` Paolo Abeni
@ 2023-02-15 23:05     ` Jakub Kicinski
  0 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2023-02-15 23:05 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: netdev, David S. Miller, Eric Dumazet, Shuah Khan

On Wed, 15 Feb 2023 21:33:01 +0100 Paolo Abeni wrote:
> That is not needed, as every other devices forwarding packets to the
> netns has proper isolation (RPS or IRQ affinity) already set. If the
> child ns device RPS configuration is left unchanged, the incoming
> packets in the child netns go through an unneeded RPS stage, which
> could be a bad thing if the selected CPU is on a different NUMA node.

I see your point now. Must have been low on coffee in the morning.
Thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 1/2] net: make default_rps_mask a per netns attribute
  2023-02-15 18:33 ` [PATCH net-next 1/2] net: make default_rps_mask a per netns attribute Paolo Abeni
@ 2023-02-16 13:06   ` kernel test robot
  0 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-02-16 13:06 UTC (permalink / raw)
  To: Paolo Abeni, netdev
  Cc: oe-kbuild-all, Jakub Kicinski, Eric Dumazet, Shuah Khan

Hi Paolo,

I love your patch! Yet something to improve:

[auto build test ERROR on net-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Paolo-Abeni/net-make-default_rps_mask-a-per-netns-attribute/20230216-023751
patch link:    https://lore.kernel.org/r/35bde791a3fd775f0a027bba04a549233b705494.1676484775.git.pabeni%40redhat.com
patch subject: [PATCH net-next 1/2] net: make default_rps_mask a per netns attribute
config: parisc-defconfig (https://download.01.org/0day-ci/archive/20230216/202302162052.Pm80BWt6-lkp@intel.com/config)
compiler: hppa-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/7b0e8effb3c7ac131c0da6e37cc33322ff22e2e3
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Paolo-Abeni/net-make-default_rps_mask-a-per-netns-attribute/20230216-023751
        git checkout 7b0e8effb3c7ac131c0da6e37cc33322ff22e2e3
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=parisc olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=parisc SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302162052.Pm80BWt6-lkp@intel.com/

All errors (new ones prefixed by >>):

   net/core/sysctl_net_core.c: In function 'rps_default_mask_cow_alloc':
>> net/core/sysctl_net_core.c:85:33: error: passing argument 1 of 'zalloc_cpumask_var' from incompatible pointer type [-Werror=incompatible-pointer-types]
      85 |         if (!zalloc_cpumask_var(&rps_default_mask, GFP_KERNEL))
         |                                 ^~~~~~~~~~~~~~~~~
         |                                 |
         |                                 struct cpumask **
   In file included from include/linux/smp.h:13,
                    from include/linux/lockdep.h:14,
                    from include/linux/spinlock.h:63,
                    from include/linux/debugobjects.h:6,
                    from include/linux/timer.h:8,
                    from include/linux/workqueue.h:9,
                    from include/linux/bpf.h:10,
                    from include/linux/filter.h:9,
                    from net/core/sysctl_net_core.c:9:
   include/linux/cpumask.h:897:54: note: expected 'struct cpumask (*)[1]' but argument is of type 'struct cpumask **'
     897 | static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
         |                                       ~~~~~~~~~~~~~~~^~~~
   cc1: some warnings being treated as errors


vim +/zalloc_cpumask_var +85 net/core/sysctl_net_core.c

    77	
    78	static struct cpumask *rps_default_mask_cow_alloc(struct net *net)
    79	{
    80		struct cpumask *rps_default_mask;
    81	
    82		if (net->core.rps_default_mask)
    83			return net->core.rps_default_mask;
    84	
  > 85		if (!zalloc_cpumask_var(&rps_default_mask, GFP_KERNEL))
    86			return NULL;
    87	
    88		/* pairs with READ_ONCE in rx_queue_default_mask() */
    89		WRITE_ONCE(net->core.rps_default_mask, rps_default_mask);
    90		return rps_default_mask;
    91	}
    92	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-02-16 13:07 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-15 18:33 [PATCH net-next 0/2] net: default_rps_mask follow-up Paolo Abeni
2023-02-15 18:33 ` [PATCH net-next 1/2] net: make default_rps_mask a per netns attribute Paolo Abeni
2023-02-16 13:06   ` kernel test robot
2023-02-15 18:33 ` [PATCH net-next 2/2] self-tests: more rps self tests Paolo Abeni
2023-02-15 19:29 ` [PATCH net-next 0/2] net: default_rps_mask follow-up Jakub Kicinski
2023-02-15 20:33   ` Paolo Abeni
2023-02-15 23:05     ` Jakub Kicinski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).