From mboxrd@z Thu Jan  1 00:00:00 1970
From: Andrei Vagin <avagin@virtuozzo.com>
Subject: Re: [net-next, v6, 6/7] net-sysfs: Add interface for Rx queue(s) map
 per Tx queue
Date: Wed, 1 Aug 2018 17:11:59 -0700
Message-ID: <20180802001157.GA29171@outlook.office365.com>
References: <153033282719.8297.790755582693609184.stgit@anamhost.jf.intel.com>
 <20180704072048.GA29107@outlook.office365.com>
 <4b7f5d42-1b81-f095-f313-f43e41cf8601@intel.com>
Mime-Version: 1.0
Content-Type: multipart/mixed; boundary="VbJkn9YxBvnuCH5J"
Cc: netdev@vger.kernel.org, davem@davemloft.net,
        alexander.h.duyck@intel.com, willemdebruijn.kernel@gmail.com,
        sridhar.samudrala@intel.com, alexander.duyck@gmail.com,
        edumazet@google.com, hannes@stressinduktion.org,
        tom@herbertland.com, tom@quantonium.net, jasowang@redhat.com,
        gaowanlong@cn.fujitsu.com, "Michael S. Tsirkin" <mst@redhat.com>,
        virtualization@lists.linux-foundation.org
To: "Nambiar, Amritha" <amritha.nambiar@intel.com>
Return-path: <netdev-owner@vger.kernel.org>
Received: from mail-eopbgr40095.outbound.protection.outlook.com ([40.107.4.95]:27168
        "EHLO EUR03-DB5-obe.outbound.protection.outlook.com"
        rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP
        id S1728052AbeHBCAl (ORCPT <rfc822;netdev@vger.kernel.org>);
        Wed, 1 Aug 2018 22:00:41 -0400
Content-Disposition: inline
In-Reply-To: <4b7f5d42-1b81-f095-f313-f43e41cf8601@intel.com>
Sender: netdev-owner@vger.kernel.org
List-ID: <netdev.vger.kernel.org>


--VbJkn9YxBvnuCH5J
Content-Type: text/plain; charset=koi8-r
Content-Disposition: inline

On Tue, Jul 10, 2018 at 07:28:49PM -0700, Nambiar, Amritha wrote:
> With this patch series, I introduced static_key for XPS maps
> (xps_needed), so static_key_slow_inc() is used to switch branches. The
> definition of static_key_slow_inc() has cpus_read_lock in place. In the
> virtio_net driver, XPS queues are initialized after setting the
> queue:cpu affinity in virtnet_set_affinity() which is already protected
> within cpus_read_lock. Hence, the warning here trying to acquire
> cpus_read_lock when it is already held.
> 
> A quick fix for this would be to just extract netif_set_xps_queue() out
> of the lock by simply wrapping it with another put/get_online_cpus
> (unlock right before and hold lock right after).

virtnet_set_affinity() is called from virtnet_cpu_online(), which is
called under cpus_read_lock too.

It looks like now we can't call netif_set_xps_queue() from cpu hotplug
callbacks.

I can suggest a very straightforward fix for this problem. The patch is
attached.

> But this may not a
> clean solution. It'd help if I can get suggestions on what would be a
> clean option to fix this without extensively changing the code in
> virtio_net. Is it mandatory to protect the affinitization with
> read_lock? I don't see similar lock in other drivers while setting the
> affinity. I understand this warning should go away, but isn't it safe to
> have multiple readers.
> 
> > On Fri, Jun 29, 2018 at 09:27:07PM -0700, Amritha Nambiar wrote:

--VbJkn9YxBvnuCH5J
Content-Type: text/plain; charset=koi8-r
Content-Disposition: attachment; filename=patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1880c86e84b4..ebb5470090e5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1941,7 +1941,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
 	for_each_online_cpu(cpu) {
 		virtqueue_set_affinity(vi->rq[i].vq, cpu);
 		virtqueue_set_affinity(vi->sq[i].vq, cpu);
-		netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
+		__netif_set_xps_queue(vi->dev, cpumask_bits(cpumask_of(cpu)), i, false, true);
 		i++;
 	}
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c917467a2c7..37c88e566806 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3320,7 +3320,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index);
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
-			  u16 index, bool is_rxqs_map);
+			  u16 index, bool is_rxqs_map, bool cpuslocked);
 
 /**
  *	netif_attr_test_mask - Test a CPU or Rx queue set in a mask
diff --git a/net/core/dev.c b/net/core/dev.c
index 38b0c414d780..96ade614a23e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2176,6 +2176,7 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 	if (!static_key_false(&xps_needed))
 		return;
 
+	cpus_read_lock();
 	mutex_lock(&xps_map_mutex);
 
 	if (static_key_false(&xps_rxqs_needed)) {
@@ -2199,10 +2200,11 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 
 out_no_maps:
 	if (static_key_enabled(&xps_rxqs_needed))
-		static_key_slow_dec(&xps_rxqs_needed);
+		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
 
-	static_key_slow_dec(&xps_needed);
+	static_key_slow_dec_cpuslocked(&xps_needed);
 	mutex_unlock(&xps_map_mutex);
+	cpus_read_unlock();
 }
 
 static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
@@ -2251,7 +2253,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
 }
 
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
-			  u16 index, bool is_rxqs_map)
+			  u16 index, bool is_rxqs_map, bool cpuslocked)
 {
 	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
@@ -2275,6 +2277,9 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 			return -EINVAL;
 	}
 
+	if (!cpuslocked)
+		cpus_read_lock();
+
 	mutex_lock(&xps_map_mutex);
 	if (is_rxqs_map) {
 		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
@@ -2317,9 +2322,9 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
-	static_key_slow_inc(&xps_needed);
+	static_key_slow_inc_cpuslocked(&xps_needed);
 	if (is_rxqs_map)
-		static_key_slow_inc(&xps_rxqs_needed);
+		static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
 
 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 	     j < nr_ids;) {
@@ -2427,6 +2432,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
+	if (!cpuslocked)
+		cpus_read_unlock();
 
 	return 0;
 error:
@@ -2444,15 +2451,18 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	}
 
 	mutex_unlock(&xps_map_mutex);
+	if (!cpuslocked)
+		cpus_read_unlock();
 
 	kfree(new_dev_maps);
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(__netif_set_xps_queue);
 
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index)
 {
-	return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+	return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false, false);
 }
 EXPORT_SYMBOL(netif_set_xps_queue);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 0a95bcf64cdc..06a141445d80 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1400,7 +1400,7 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
 		return err;
 	}
 
-	err = __netif_set_xps_queue(dev, mask, index, true);
+	err = __netif_set_xps_queue(dev, mask, index, true, false);
 	kfree(mask);
 	return err ? : len;
 }

--VbJkn9YxBvnuCH5J--