From: Jens Axboe <axboe@kernel.dk>
To: io-uring@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>, Zhang Qiang <qiang.zhang@windriver.com>
Subject: [PATCH 2/4] io-wq: re-set NUMA node affinities if CPUs come online
Date: Thu, 22 Oct 2020 16:24:45 -0600 [thread overview]
Message-ID: <20201022222447.62020-3-axboe@kernel.dk> (raw)
In-Reply-To: <20201022222447.62020-1-axboe@kernel.dk>
We correctly set io-wq NUMA node affinities when the io-wq context is
setup, but if an entire node CPU set is offlined and then brought back
online, the per node affinities are broken. Ensure that we set them
again whenever a CPU comes online. This ensures that we always track
the right node affinity. The usual cpuhp notifiers are used to drive it.
Reported-by: Zhang Qiang <qiang.zhang@windriver.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/io-wq.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 54 insertions(+), 4 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 4012ff541b7b..d3165ce339c2 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -19,6 +19,7 @@
#include <linux/task_work.h>
#include <linux/blk-cgroup.h>
#include <linux/audit.h>
+#include <linux/cpu.h>
#include "io-wq.h"
@@ -123,9 +124,13 @@ struct io_wq {
refcount_t refs;
struct completion done;
+ struct hlist_node cpuhp_node;
+
refcount_t use_refs;
};
+static enum cpuhp_state io_wq_online;
+
static bool io_worker_get(struct io_worker *worker)
{
return refcount_inc_not_zero(&worker->ref);
@@ -1091,10 +1096,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(-ENOMEM);
wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
- if (!wq->wqes) {
- kfree(wq);
- return ERR_PTR(-ENOMEM);
- }
+ if (!wq->wqes)
+ goto err_wq;
+
+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+ if (ret)
+ goto err_wqes;
wq->free_work = data->free_work;
wq->do_work = data->do_work;
@@ -1102,6 +1109,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
/* caller must already hold a reference to this */
wq->user = data->user;
+ ret = -ENOMEM;
for_each_node(node) {
struct io_wqe *wqe;
int alloc_node = node;
@@ -1145,9 +1153,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
ret = PTR_ERR(wq->manager);
complete(&wq->done);
err:
+ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
for_each_node(node)
kfree(wq->wqes[node]);
+err_wqes:
kfree(wq->wqes);
+err_wq:
kfree(wq);
return ERR_PTR(ret);
}
@@ -1164,6 +1175,8 @@ static void __io_wq_destroy(struct io_wq *wq)
{
int node;
+ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
+
set_bit(IO_WQ_BIT_EXIT, &wq->state);
if (wq->manager)
kthread_stop(wq->manager);
@@ -1191,3 +1204,40 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
{
return wq->manager;
}
+
+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
+{
+ struct task_struct *task = worker->task;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
+ task->flags |= PF_NO_SETAFFINITY;
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return false;
+}
+
+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+ int i;
+
+ rcu_read_lock();
+ for_each_node(i)
+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
+ rcu_read_unlock();
+ return 0;
+}
+
+static __init int io_wq_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
+ io_wq_cpu_online, NULL);
+ if (ret < 0)
+ return ret;
+ io_wq_online = ret;
+ return 0;
+}
+subsys_initcall(io_wq_init);
--
2.29.0
next prev parent reply other threads:[~2020-10-22 22:24 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-22 22:24 Fixes for 5.10 v2 Jens Axboe
2020-10-22 22:24 ` [PATCH 1/4] io_uring: unify fsize with def->work_flags Jens Axboe
2020-10-22 22:24 ` Jens Axboe [this message]
2020-10-22 22:24 ` [PATCH 3/4] io_uring: make loop_rw_iter() use original user supplied pointers Jens Axboe
2020-10-22 22:24 ` [PATCH 4/4] splice: change exported internal do_splice() helper to take kernel offset Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201022222447.62020-3-axboe@kernel.dk \
--to=axboe@kernel.dk \
--cc=io-uring@vger.kernel.org \
--cc=qiang.zhang@windriver.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.