From: Tejun Heo <tj@kernel.org>
To: torvalds@linux-foundation.org, jannh@google.com,
paulmck@linux.vnet.ibm.com, bcrl@kvack.org,
viro@zeniv.linux.org.uk, kent.overstreet@gmail.com
Cc: security@kernel.org, linux-kernel@vger.kernel.org,
kernel-team@fb.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 7/7] RCU, workqueue: Implement rcu_work
Date: Tue, 6 Mar 2018 09:33:16 -0800 [thread overview]
Message-ID: <20180306173316.3088458-7-tj@kernel.org> (raw)
In-Reply-To: <20180306173316.3088458-1-tj@kernel.org>
There are cases where RCU callback needs to be bounced to a sleepable
context. This is currently done by the RCU callback queueing a work
item, which can be cumbersome to write and confusing to read.
This patch introduces rcu_work, a workqueue work variant which gets
executed after a RCU grace period, and converts the open coded
bouncing in fs/aio and kernel/cgroup.
v2: Use rcu_barrier() instead of synchronize_rcu() to wait for
completion of previously queued rcu callback as per Paul.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
fs/aio.c | 21 +++++-------------
include/linux/cgroup-defs.h | 2 +-
include/linux/workqueue.h | 38 +++++++++++++++++++++++++++++++
kernel/cgroup/cgroup.c | 21 ++++++------------
kernel/workqueue.c | 54 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 106 insertions(+), 30 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 6bcd3fb..88d7927 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -115,8 +115,7 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
- struct rcu_head free_rcu;
- struct work_struct free_work; /* see free_ioctx() */
+ struct rcu_work free_rwork; /* see free_ioctx() */
/*
* signals when all in-flight requests are done
@@ -592,13 +591,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
/*
* free_ioctx() should be RCU delayed to synchronize against the RCU
* protected lookup_ioctx() and also needs process context to call
- * aio_free_ring(), so the double bouncing through kioctx->free_rcu and
- * ->free_work.
+ * aio_free_ring(). Use rcu_work.
*/
static void free_ioctx(struct work_struct *work)
{
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
-
+ struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
+ free_rwork);
pr_debug("freeing %p\n", ctx);
aio_free_ring(ctx);
@@ -608,14 +606,6 @@ static void free_ioctx(struct work_struct *work)
kmem_cache_free(kioctx_cachep, ctx);
}
-static void free_ioctx_rcufn(struct rcu_head *head)
-{
- struct kioctx *ctx = container_of(head, struct kioctx, free_rcu);
-
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
-}
-
static void free_ioctx_reqs(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
@@ -625,7 +615,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
complete(&ctx->rq_wait->comp);
/* Synchronize against RCU protected table->table[] dereferences */
- call_rcu(&ctx->free_rcu, free_ioctx_rcufn);
+ INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
+ queue_rcu_work(system_wq, &ctx->free_rwork);
}
/*
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 9f242b8..92d7640 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -151,8 +151,8 @@ struct cgroup_subsys_state {
atomic_t online_cnt;
/* percpu_ref killing and RCU release */
- struct rcu_head rcu_head;
struct work_struct destroy_work;
+ struct rcu_work destroy_rwork;
/*
* PI: the parent css. Placed here for cache proximity to following
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index bc0cda1..b39f3a4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -13,6 +13,7 @@
#include <linux/threads.h>
#include <linux/atomic.h>
#include <linux/cpumask.h>
+#include <linux/rcupdate.h>
struct workqueue_struct;
@@ -120,6 +121,15 @@ struct delayed_work {
int cpu;
};
+struct rcu_work {
+ struct work_struct work;
+ struct rcu_head rcu;
+
+ /* target workqueue and CPU ->rcu uses to queue ->work */
+ struct workqueue_struct *wq;
+ int cpu;
+};
+
/**
* struct workqueue_attrs - A struct for workqueue attributes.
*
@@ -151,6 +161,11 @@ static inline struct delayed_work *to_delayed_work(struct work_struct *work)
return container_of(work, struct delayed_work, work);
}
+static inline struct rcu_work *to_rcu_work(struct work_struct *work)
+{
+ return container_of(work, struct rcu_work, work);
+}
+
struct execute_work {
struct work_struct work;
};
@@ -266,6 +281,12 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
#define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func) \
__INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE)
+#define INIT_RCU_WORK(_work, _func) \
+ INIT_WORK(&(_work)->work, (_func))
+
+#define INIT_RCU_WORK_ONSTACK(_work, _func) \
+ INIT_WORK_ONSTACK(&(_work)->work, (_func))
+
/**
* work_pending - Find out whether a work item is currently pending
* @work: The work item in question
@@ -447,6 +468,8 @@ extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *work, unsigned long delay);
extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay);
+extern bool queue_rcu_work_on(int cpu, struct workqueue_struct *wq,
+ struct rcu_work *rwork);
extern void flush_workqueue(struct workqueue_struct *wq);
extern void drain_workqueue(struct workqueue_struct *wq);
@@ -463,6 +486,8 @@ extern bool flush_delayed_work(struct delayed_work *dwork);
extern bool cancel_delayed_work(struct delayed_work *dwork);
extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
+extern bool flush_rcu_work(struct rcu_work *rwork);
+
extern void workqueue_set_max_active(struct workqueue_struct *wq,
int max_active);
extern struct work_struct *current_work(void);
@@ -520,6 +545,19 @@ static inline bool mod_delayed_work(struct workqueue_struct *wq,
}
/**
+ * queue_rcu_work - queue work on a workqueue after a RCU grace period
+ * @wq: workqueue to use
+ * @rwork: RCU work to queue
+ *
+ * Equivalent to queue_rcu_work_on() but tries to use the local CPU.
+ */
+static inline bool queue_rcu_work(struct workqueue_struct *wq,
+ struct rcu_work *rwork)
+{
+ return queue_rcu_work_on(WORK_CPU_UNBOUND, wq, rwork);
+}
+
+/**
* schedule_work_on - put work task on a specific cpu
* @cpu: cpu to put the work task on
* @work: job to be done
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 8cda3bc..4c5d4ca0 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4514,10 +4514,10 @@ static struct cftype cgroup_base_files[] = {
* and thus involve punting to css->destroy_work adding two additional
* steps to the already complex sequence.
*/
-static void css_free_work_fn(struct work_struct *work)
+static void css_free_rwork_fn(struct work_struct *work)
{
- struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, destroy_work);
+ struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
+ struct cgroup_subsys_state, destroy_rwork);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
@@ -4563,15 +4563,6 @@ static void css_free_work_fn(struct work_struct *work)
}
}
-static void css_free_rcu_fn(struct rcu_head *rcu_head)
-{
- struct cgroup_subsys_state *css =
- container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
-
- INIT_WORK(&css->destroy_work, css_free_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
-}
-
static void css_release_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
@@ -4621,7 +4612,8 @@ static void css_release_work_fn(struct work_struct *work)
mutex_unlock(&cgroup_mutex);
- call_rcu(&css->rcu_head, css_free_rcu_fn);
+ INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
+ queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
}
static void css_release(struct percpu_ref *ref)
@@ -4755,7 +4747,8 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
err_list_del:
list_del_rcu(&css->sibling);
err_free_css:
- call_rcu(&css->rcu_head, css_free_rcu_fn);
+ INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
+ queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
return ERR_PTR(err);
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bb9a519..e26c2f4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1604,6 +1604,40 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL_GPL(mod_delayed_work_on);
+static void rcu_work_rcufn(struct rcu_head *rcu)
+{
+ struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
+
+ /* read the comment in __queue_work() */
+ local_irq_disable();
+ __queue_work(rwork->cpu, rwork->wq, &rwork->work);
+ local_irq_enable();
+}
+
+/**
+ * queue_rcu_work_on - queue work on specific CPU after a RCU grace period
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @rwork: work to queue
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
+ */
+bool queue_rcu_work_on(int cpu, struct workqueue_struct *wq,
+ struct rcu_work *rwork)
+{
+ struct work_struct *work = &rwork->work;
+
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+ rwork->wq = wq;
+ rwork->cpu = cpu;
+ call_rcu(&rwork->rcu, rcu_work_rcufn);
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(queue_rcu_work_on);
+
/**
* worker_enter_idle - enter idle state
* @worker: worker which is entering idle state
@@ -3001,6 +3035,26 @@ bool flush_delayed_work(struct delayed_work *dwork)
}
EXPORT_SYMBOL(flush_delayed_work);
+/**
+ * flush_rcu_work - wait for a rwork to finish executing the last queueing
+ * @rwork: the rcu work to flush
+ *
+ * Return:
+ * %true if flush_rcu_work() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_rcu_work(struct rcu_work *rwork)
+{
+ if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
+ rcu_barrier();
+ flush_work(&rwork->work);
+ return true;
+ } else {
+ return flush_work(&rwork->work);
+ }
+}
+EXPORT_SYMBOL(flush_rcu_work);
+
static bool __cancel_work(struct work_struct *work, bool is_dwork)
{
unsigned long flags;
--
2.9.5
next prev parent reply other threads:[~2018-03-06 17:33 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-06 17:26 [PATCHSET] percpu_ref, RCU: Audit RCU usages in percpu_ref users Tejun Heo
2018-03-06 17:33 ` [PATCH 1/7] fs/aio: Add explicit RCU grace period when freeing kioctx Tejun Heo
2018-03-06 17:33 ` [PATCH 2/7] fs/aio: Use RCU accessors for kioctx_table->table[] Tejun Heo
2018-03-06 17:33 ` [PATCH 3/7] RDMAVT: Fix synchronization around percpu_ref Tejun Heo
2018-03-07 15:39 ` Dennis Dalessandro
2018-03-06 17:33 ` [PATCH 4/7] HMM: Remove superflous RCU protection around radix tree lookup Tejun Heo
2018-03-06 17:33 ` Tejun Heo
2018-03-06 17:59 ` Jerome Glisse
2018-03-06 17:59 ` Jerome Glisse
2018-03-06 17:33 ` [PATCH 5/7] block: Remove superflous rcu_read_[un]lock_sched() in blk_queue_enter() Tejun Heo
2018-03-06 17:52 ` Bart Van Assche
2018-03-14 18:46 ` tj
2018-03-14 20:05 ` Bart Van Assche
2018-03-14 20:08 ` Peter Zijlstra
2018-03-14 20:14 ` Bart Van Assche
2018-03-06 17:33 ` [PATCH 6/7] percpu_ref: Update doc to dissuade users from depending on internal RCU grace periods Tejun Heo
2018-03-06 17:33 ` Tejun Heo [this message]
2018-03-06 18:30 ` [PATCH 7/7] RCU, workqueue: Implement rcu_work Linus Torvalds
2018-03-09 15:37 ` Tejun Heo
2018-03-07 2:49 ` Lai Jiangshan
2018-03-07 14:54 ` Paul E. McKenney
2018-03-07 16:23 ` Peter Zijlstra
2018-03-07 17:58 ` Paul E. McKenney
2018-03-08 0:29 ` Lai Jiangshan
2018-03-08 17:28 ` Paul E. McKenney
2018-03-09 16:21 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180306173316.3088458-7-tj@kernel.org \
--to=tj@kernel.org \
--cc=bcrl@kvack.org \
--cc=jannh@google.com \
--cc=kent.overstreet@gmail.com \
--cc=kernel-team@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=security@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.