From: Alexander Duyck <alexander.h.duyck@intel.com>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, jeffrey.t.kirsher@intel.com,
edumazet@google.com, bhutchings@solarflare.com,
therbert@google.com, alexander.duyck@gmail.com
Subject: [RFC PATCH 04/10] net: Rewrite netif_set_xps_queues to address several issues
Date: Fri, 29 Jun 2012 17:16:33 -0700 [thread overview]
Message-ID: <20120630001633.29939.76609.stgit@gitlad.jf.intel.com> (raw)
In-Reply-To: <20120630000652.29939.11108.stgit@gitlad.jf.intel.com>
This change is meant to address several issues I found within the
netif_set_xps_queues function.
If the allocation of one of the maps to be assigned to new_dev_maps failed
we could end up with the device map in an inconsistent state since we had
already worked through a number of CPUs and removed or added the queue. To
address that I split the process into several steps. The first of which is
just the allocation of updated maps for CPUs that will need larger maps to
store the queue. By doing this we can fail gracefully without actually
altering the contents of the current device map.
The second issue I found was the fact that we were always allocating a new
device map even if we were not adding any queues. I have updated the code
so that we only allocate a new device map if we are adding queues,
otherwise if we are not adding any queues to CPUs we just skip to the
removal process.
The last change I made was to reuse the code from remove_xps_queue to remove
the queue from the CPU. By making this change we can be consistent in how
we go about adding and removing the queues from the CPUs.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
net/core/dev.c | 183 ++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 117 insertions(+), 66 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 8e259d4..5f0550b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1786,107 +1786,158 @@ out_no_maps:
mutex_unlock(&xps_map_mutex);
}
+static struct xps_map *expand_xps_map(struct xps_map *map,
+ int cpu, u16 index)
+{
+ struct xps_map *new_map;
+ int alloc_len = XPS_MIN_MAP_ALLOC;
+ int i, pos;
+
+ for (pos = 0; map && pos < map->len; pos++) {
+ if (map->queues[pos] != index)
+ continue;
+ return map;
+ }
+
+ /* Need to add queue to this CPU's existing map */
+ if (map) {
+ if (pos < map->alloc_len)
+ return map;
+
+ alloc_len = map->alloc_len * 2;
+ }
+
+ /* Need to allocate new map to store queue on this CPU's map */
+ new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!new_map)
+ return NULL;
+
+ for (i = 0; i < pos; i++)
+ new_map->queues[i] = map->queues[i];
+ new_map->alloc_len = alloc_len;
+ new_map->len = pos;
+
+ return new_map;
+}
+
int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
{
- int i, cpu, pos, map_len, alloc_len, need_set;
+ struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
struct xps_map *map, *new_map;
- struct xps_dev_maps *dev_maps, *new_dev_maps;
- int nonempty = 0;
- int numa_node_id = -2;
int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
-
- new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
- if (!new_dev_maps)
- return -ENOMEM;
+ int cpu, numa_node_id = -2;
+ bool active = false;
mutex_lock(&xps_map_mutex);
dev_maps = xmap_dereference(dev->xps_maps);
+ /* allocate memory for queue storage */
+ for_each_online_cpu(cpu) {
+ if (!cpumask_test_cpu(cpu, mask))
+ continue;
+
+ if (!new_dev_maps)
+ new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+ if (!new_dev_maps)
+ return -ENOMEM;
+
+ map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+ NULL;
+
+ map = expand_xps_map(map, cpu, index);
+ if (!map)
+ goto error;
+
+ RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+ }
+
+ if (!new_dev_maps)
+ goto out_no_new_maps;
+
for_each_possible_cpu(cpu) {
- map = dev_maps ?
- xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
- new_map = map;
- if (map) {
- for (pos = 0; pos < map->len; pos++)
- if (map->queues[pos] == index)
- break;
- map_len = map->len;
- alloc_len = map->alloc_len;
- } else
- pos = map_len = alloc_len = 0;
+ if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
+ /* add queue to CPU maps */
+ int pos = 0;
- need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
+ map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+ while ((pos < map->len) && (map->queues[pos] != index))
+ pos++;
+
+ if (pos == map->len)
+ map->queues[map->len++] = index;
#ifdef CONFIG_NUMA
- if (need_set) {
if (numa_node_id == -2)
numa_node_id = cpu_to_node(cpu);
else if (numa_node_id != cpu_to_node(cpu))
numa_node_id = -1;
- }
#endif
- if (need_set && pos >= map_len) {
- /* Need to add queue to this CPU's map */
- if (map_len >= alloc_len) {
- alloc_len = alloc_len ?
- 2 * alloc_len : XPS_MIN_MAP_ALLOC;
- new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
- GFP_KERNEL,
- cpu_to_node(cpu));
- if (!new_map)
- goto error;
- new_map->alloc_len = alloc_len;
- for (i = 0; i < map_len; i++)
- new_map->queues[i] = map->queues[i];
- new_map->len = map_len;
- }
- new_map->queues[new_map->len++] = index;
- } else if (!need_set && pos < map_len) {
- /* Need to remove queue from this CPU's map */
- if (map_len > 1)
- new_map->queues[pos] =
- new_map->queues[--new_map->len];
- else
- new_map = NULL;
+ } else if (dev_maps) {
+ /* fill in the new device map from the old device map */
+ map = xmap_dereference(dev_maps->cpu_map[cpu]);
+ RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
}
- RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
+
}
+ rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+
/* Cleanup old maps */
- for_each_possible_cpu(cpu) {
- map = dev_maps ?
- xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
- if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
- kfree_rcu(map, rcu);
- if (new_dev_maps->cpu_map[cpu])
- nonempty = 1;
- }
+ if (dev_maps) {
+ for_each_possible_cpu(cpu) {
+ new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+ map = xmap_dereference(dev_maps->cpu_map[cpu]);
+ if (map && map != new_map)
+ kfree_rcu(map, rcu);
+ }
- if (nonempty) {
- rcu_assign_pointer(dev->xps_maps, new_dev_maps);
- } else {
- kfree(new_dev_maps);
- RCU_INIT_POINTER(dev->xps_maps, NULL);
+ kfree_rcu(dev_maps, rcu);
}
- if (dev_maps)
- kfree_rcu(dev_maps, rcu);
+ dev_maps = new_dev_maps;
+ active = true;
+out_no_new_maps:
+ /* update Tx queue numa node */
netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
(numa_node_id >= 0) ? numa_node_id :
NUMA_NO_NODE);
+ if (!dev_maps)
+ goto out_no_maps;
+
+ /* removes queue from unused CPUs */
+ for_each_possible_cpu(cpu) {
+ if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
+ continue;
+
+ if (remove_xps_queue(dev_maps, cpu, index))
+ active = true;
+ }
+
+ /* free map if not active */
+ if (!active) {
+ RCU_INIT_POINTER(dev->xps_maps, NULL);
+ kfree_rcu(dev_maps, rcu);
+ }
+
+out_no_maps:
mutex_unlock(&xps_map_mutex);
return 0;
error:
+ /* remove any maps that we added */
+ for_each_possible_cpu(cpu) {
+ new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+ map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+ NULL;
+ if (new_map && new_map != map)
+ kfree(new_map);
+ }
+
mutex_unlock(&xps_map_mutex);
- if (new_dev_maps)
- for_each_possible_cpu(i)
- kfree(rcu_dereference_protected(
- new_dev_maps->cpu_map[i],
- 1));
kfree(new_dev_maps);
return -ENOMEM;
}
next prev parent reply other threads:[~2012-06-30 0:16 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-30 0:16 [RFC PATCH 00/10] Make XPS usable within ixgbe Alexander Duyck
2012-06-30 0:16 ` [RFC PATCH 01/10] net: Split core bits of dev_pick_tx into __dev_pick_tx Alexander Duyck
2012-07-07 0:03 ` Ben Hutchings
[not found] ` <CAL1qit_mpmVYQ3D4HQsii5LJ+Nu5=ftFWAWVnfPiDbmW5eWa0Q@mail.gmail.com>
2012-08-02 15:45 ` Alexander Duyck
2012-06-30 0:16 ` [RFC PATCH 02/10] net: Add functions netif_reset_xps_queue and netif_set_xps_queue Alexander Duyck
2012-06-30 0:16 ` [RFC PATCH 03/10] net: Rewrite netif_reset_xps_queue to allow for better code reuse Alexander Duyck
2012-06-30 0:16 ` Alexander Duyck [this message]
2012-06-30 0:16 ` [RFC PATCH 05/10] net: Add support for XPS without SYSFS being defined Alexander Duyck
2012-06-30 0:16 ` [RFC PATCH 06/10] ixgbe: Define FCoE and Flow director limits much sooner to allow for changes Alexander Duyck
2012-06-30 0:16 ` [RFC PATCH 07/10] ixgbe: Add function for setting XPS queue mapping Alexander Duyck
2012-07-11 18:15 ` Ben Hutchings
2012-07-11 21:12 ` Alexander Duyck
2012-06-30 0:16 ` [RFC PATCH 08/10] ixgbe: Update ixgbe driver to use __dev_pick_tx in ixgbe_select_queue Alexander Duyck
2012-06-30 0:16 ` [RFC PATCH 09/10] ixgbe: Add support for displaying the number of Tx/Rx channels Alexander Duyck
2012-07-11 18:21 ` Ben Hutchings
2012-07-11 21:00 ` Alexander Duyck
2012-06-30 0:17 ` [RFC PATCH 10/10] ixgbe: Add support for set_channels ethtool operation Alexander Duyck
2012-07-03 22:30 ` [RFC PATCH 00/10] Make XPS usable within ixgbe Tom Herbert
2012-07-03 22:41 ` John Fastabend
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120630001633.29939.76609.stgit@gitlad.jf.intel.com \
--to=alexander.h.duyck@intel.com \
--cc=alexander.duyck@gmail.com \
--cc=bhutchings@solarflare.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=jeffrey.t.kirsher@intel.com \
--cc=netdev@vger.kernel.org \
--cc=therbert@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).