[PATCH 7/7] RCU, workqueue: Implement rcu_work

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Tejun Heo <tj@kernel.org>
To: torvalds@linux-foundation.org, jannh@google.com,
	paulmck@linux.vnet.ibm.com, bcrl@kvack.org,
	viro@zeniv.linux.org.uk, kent.overstreet@gmail.com
Cc: security@kernel.org, linux-kernel@vger.kernel.org,
	kernel-team@fb.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 7/7] RCU, workqueue: Implement rcu_work
Date: Tue,  6 Mar 2018 09:33:16 -0800	[thread overview]
Message-ID: <20180306173316.3088458-7-tj@kernel.org> (raw)
In-Reply-To: <20180306173316.3088458-1-tj@kernel.org>

There are cases where RCU callback needs to be bounced to a sleepable
context.  This is currently done by the RCU callback queueing a work
item, which can be cumbersome to write and confusing to read.

This patch introduces rcu_work, a workqueue work variant which gets
executed after a RCU grace period, and converts the open coded
bouncing in fs/aio and kernel/cgroup.

v2: Use rcu_barrier() instead of synchronize_rcu() to wait for
    completion of previously queued rcu callback as per Paul.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c                    | 21 +++++-------------
 include/linux/cgroup-defs.h |  2 +-
 include/linux/workqueue.h   | 38 +++++++++++++++++++++++++++++++
 kernel/cgroup/cgroup.c      | 21 ++++++------------
 kernel/workqueue.c          | 54 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 6bcd3fb..88d7927 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -115,8 +115,7 @@ struct kioctx {
 	struct page		**ring_pages;
 	long			nr_pages;
 
-	struct rcu_head		free_rcu;
-	struct work_struct	free_work;	/* see free_ioctx() */
+	struct rcu_work		free_rwork;	/* see free_ioctx() */
 
 	/*
 	 * signals when all in-flight requests are done
@@ -592,13 +591,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
 /*
  * free_ioctx() should be RCU delayed to synchronize against the RCU
  * protected lookup_ioctx() and also needs process context to call
- * aio_free_ring(), so the double bouncing through kioctx->free_rcu and
- * ->free_work.
+ * aio_free_ring().  Use rcu_work.
  */
 static void free_ioctx(struct work_struct *work)
 {
-	struct kioctx *ctx = container_of(work, struct kioctx, free_work);
-
+	struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
+					  free_rwork);
 	pr_debug("freeing %p\n", ctx);
 
 	aio_free_ring(ctx);
@@ -608,14 +606,6 @@ static void free_ioctx(struct work_struct *work)
 	kmem_cache_free(kioctx_cachep, ctx);
 }
 
-static void free_ioctx_rcufn(struct rcu_head *head)
-{
-	struct kioctx *ctx = container_of(head, struct kioctx, free_rcu);
-
-	INIT_WORK(&ctx->free_work, free_ioctx);
-	schedule_work(&ctx->free_work);
-}
-
 static void free_ioctx_reqs(struct percpu_ref *ref)
 {
 	struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
@@ -625,7 +615,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 		complete(&ctx->rq_wait->comp);
 
 	/* Synchronize against RCU protected table->table[] dereferences */
-	call_rcu(&ctx->free_rcu, free_ioctx_rcufn);
+	INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
+	queue_rcu_work(system_wq, &ctx->free_rwork);
 }
 
 /*
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 9f242b8..92d7640 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -151,8 +151,8 @@ struct cgroup_subsys_state {
 	atomic_t online_cnt;
 
 	/* percpu_ref killing and RCU release */
-	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
+	struct rcu_work destroy_rwork;
 
 	/*
 	 * PI: the parent css.	Placed here for cache proximity to following
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index bc0cda1..b39f3a4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -13,6 +13,7 @@
 #include <linux/threads.h>
 #include <linux/atomic.h>
 #include <linux/cpumask.h>
+#include <linux/rcupdate.h>
 
 struct workqueue_struct;
 
@@ -120,6 +121,15 @@ struct delayed_work {
 	int cpu;
 };
 
+struct rcu_work {
+	struct work_struct work;
+	struct rcu_head rcu;
+
+	/* target workqueue and CPU ->rcu uses to queue ->work */
+	struct workqueue_struct *wq;
+	int cpu;
+};
+
 /**
  * struct workqueue_attrs - A struct for workqueue attributes.
  *
@@ -151,6 +161,11 @@ static inline struct delayed_work *to_delayed_work(struct work_struct *work)
 	return container_of(work, struct delayed_work, work);
 }
 
+static inline struct rcu_work *to_rcu_work(struct work_struct *work)
+{
+	return container_of(work, struct rcu_work, work);
+}
+
 struct execute_work {
 	struct work_struct work;
 };
@@ -266,6 +281,12 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func)			\
 	__INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE)
 
+#define INIT_RCU_WORK(_work, _func)					\
+	INIT_WORK(&(_work)->work, (_func))
+
+#define INIT_RCU_WORK_ONSTACK(_work, _func)				\
+	INIT_WORK_ONSTACK(&(_work)->work, (_func))
+
 /**
  * work_pending - Find out whether a work item is currently pending
  * @work: The work item in question
@@ -447,6 +468,8 @@ extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *dwork, unsigned long delay);
+extern bool queue_rcu_work_on(int cpu, struct workqueue_struct *wq,
+			struct rcu_work *rwork);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
@@ -463,6 +486,8 @@ extern bool flush_delayed_work(struct delayed_work *dwork);
 extern bool cancel_delayed_work(struct delayed_work *dwork);
 extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
+extern bool flush_rcu_work(struct rcu_work *rwork);
+
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
 extern struct work_struct *current_work(void);
@@ -520,6 +545,19 @@ static inline bool mod_delayed_work(struct workqueue_struct *wq,
 }
 
 /**
+ * queue_rcu_work - queue work on a workqueue after a RCU grace period
+ * @wq: workqueue to use
+ * @rwork: RCU work to queue
+ *
+ * Equivalent to queue_rcu_work_on() but tries to use the local CPU.
+ */
+static inline bool queue_rcu_work(struct workqueue_struct *wq,
+				  struct rcu_work *rwork)
+{
+	return queue_rcu_work_on(WORK_CPU_UNBOUND, wq, rwork);
+}
+
+/**
  * schedule_work_on - put work task on a specific cpu
  * @cpu: cpu to put the work task on
  * @work: job to be done
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 8cda3bc..4c5d4ca0 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4514,10 +4514,10 @@ static struct cftype cgroup_base_files[] = {
  * and thus involve punting to css->destroy_work adding two additional
  * steps to the already complex sequence.
  */
-static void css_free_work_fn(struct work_struct *work)
+static void css_free_rwork_fn(struct work_struct *work)
 {
-	struct cgroup_subsys_state *css =
-		container_of(work, struct cgroup_subsys_state, destroy_work);
+	struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
+				struct cgroup_subsys_state, destroy_rwork);
 	struct cgroup_subsys *ss = css->ss;
 	struct cgroup *cgrp = css->cgroup;
 
@@ -4563,15 +4563,6 @@ static void css_free_work_fn(struct work_struct *work)
 	}
 }
 
-static void css_free_rcu_fn(struct rcu_head *rcu_head)
-{
-	struct cgroup_subsys_state *css =
-		container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
-
-	INIT_WORK(&css->destroy_work, css_free_work_fn);
-	queue_work(cgroup_destroy_wq, &css->destroy_work);
-}
-
 static void css_release_work_fn(struct work_struct *work)
 {
 	struct cgroup_subsys_state *css =
@@ -4621,7 +4612,8 @@ static void css_release_work_fn(struct work_struct *work)
 
 	mutex_unlock(&cgroup_mutex);
 
-	call_rcu(&css->rcu_head, css_free_rcu_fn);
+	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
+	queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
 }
 
 static void css_release(struct percpu_ref *ref)
@@ -4755,7 +4747,8 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 err_list_del:
 	list_del_rcu(&css->sibling);
 err_free_css:
-	call_rcu(&css->rcu_head, css_free_rcu_fn);
+	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
+	queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
 	return ERR_PTR(err);
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bb9a519..e26c2f4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1604,6 +1604,40 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(mod_delayed_work_on);
 
+static void rcu_work_rcufn(struct rcu_head *rcu)
+{
+	struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
+
+	/* read the comment in __queue_work() */
+	local_irq_disable();
+	__queue_work(rwork->cpu, rwork->wq, &rwork->work);
+	local_irq_enable();
+}
+
+/**
+ * queue_rcu_work_on - queue work on specific CPU after a RCU grace period
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @rwork: work to queue
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
+ */
+bool queue_rcu_work_on(int cpu, struct workqueue_struct *wq,
+		       struct rcu_work *rwork)
+{
+	struct work_struct *work = &rwork->work;
+
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+		rwork->wq = wq;
+		rwork->cpu = cpu;
+		call_rcu(&rwork->rcu, rcu_work_rcufn);
+		return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(queue_rcu_work_on);
+
 /**
  * worker_enter_idle - enter idle state
  * @worker: worker which is entering idle state
@@ -3001,6 +3035,26 @@ bool flush_delayed_work(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(flush_delayed_work);
 
+/**
+ * flush_rcu_work - wait for a rwork to finish executing the last queueing
+ * @rwork: the rcu work to flush
+ *
+ * Return:
+ * %true if flush_rcu_work() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_rcu_work(struct rcu_work *rwork)
+{
+	if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
+		rcu_barrier();
+		flush_work(&rwork->work);
+		return true;
+	} else {
+		return flush_work(&rwork->work);
+	}
+}
+EXPORT_SYMBOL(flush_rcu_work);
+
 static bool __cancel_work(struct work_struct *work, bool is_dwork)
 {
 	unsigned long flags;
-- 
2.9.5

next prev parent reply	other threads:[~2018-03-06 17:33 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-06 17:26 [PATCHSET] percpu_ref, RCU: Audit RCU usages in percpu_ref users Tejun Heo
2018-03-06 17:33 ` [PATCH 1/7] fs/aio: Add explicit RCU grace period when freeing kioctx Tejun Heo
2018-03-06 17:33   ` [PATCH 2/7] fs/aio: Use RCU accessors for kioctx_table->table[] Tejun Heo
2018-03-06 17:33   ` [PATCH 3/7] RDMAVT: Fix synchronization around percpu_ref Tejun Heo
2018-03-07 15:39     ` Dennis Dalessandro
2018-03-06 17:33   ` [PATCH 4/7] HMM: Remove superflous RCU protection around radix tree lookup Tejun Heo
2018-03-06 17:33     ` Tejun Heo
2018-03-06 17:59     ` Jerome Glisse
2018-03-06 17:59       ` Jerome Glisse
2018-03-06 17:33   ` [PATCH 5/7] block: Remove superflous rcu_read_[un]lock_sched() in blk_queue_enter() Tejun Heo
2018-03-06 17:52     ` Bart Van Assche
2018-03-14 18:46       ` tj
2018-03-14 20:05         ` Bart Van Assche
2018-03-14 20:08           ` Peter Zijlstra
2018-03-14 20:14             ` Bart Van Assche
2018-03-06 17:33   ` [PATCH 6/7] percpu_ref: Update doc to dissuade users from depending on internal RCU grace periods Tejun Heo
2018-03-06 17:33   ` Tejun Heo [this message]
2018-03-06 18:30     ` [PATCH 7/7] RCU, workqueue: Implement rcu_work Linus Torvalds
2018-03-09 15:37       ` Tejun Heo
2018-03-07  2:49     ` Lai Jiangshan
2018-03-07 14:54       ` Paul E. McKenney
2018-03-07 16:23         ` Peter Zijlstra
2018-03-07 17:58           ` Paul E. McKenney
2018-03-08  0:29         ` Lai Jiangshan
2018-03-08 17:28           ` Paul E. McKenney
2018-03-09 16:21           ` Tejun Heo

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:6bcd3fb dfblob:88d7927 dfblob:9f242b8 dfblob:92d7640
dfblob:bc0cda1 dfblob:b39f3a4 dfblob:8cda3bc dfblob:4c5d4ca
dfblob:bb9a519 dfblob:e26c2f4 )
 OR (
bs:"[PATCH 7/7] RCU, workqueue: Implement rcu_work" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180306173316.3088458-7-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=bcrl@kvack.org \
    --cc=jannh@google.com \
    --cc=kent.overstreet@gmail.com \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=security@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.