All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: axboe@kernel.dk, vgoyal@redhat.com
Cc: ctalbott@google.com, rni@google.com,
	linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>
Subject: [PATCH 01/10] block, cfq: replace current_io_context() with create_io_context()
Date: Sat, 29 Oct 2011 16:49:38 -0700	[thread overview]
Message-ID: <1319932187-7631-2-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1319932187-7631-1-git-send-email-tj@kernel.org>

When called under queue_lock, current_io_context() triggers lockdep
warning if it hits allocation path.  This is because io_context
installation is protected by task_lock which is not IRQ safe, so it
triggers irq-unsafe-lock -> irq -> irq-safe-lock -> irq-unsafe-lock
deadlock warning.

Given the restriction, accessor + creator rolled into one doesn't work
too well.  Drop current_io_context() and let the users access
task->io_context directly inside queue_lock combined with explicit
creation using create_io_context().

Future ioc updates will further consolidate ioc access and the create
interface will be unexported.

While at it, relocate ioc internal interface declarations in blk.h and
add section comments before and after.

This patch does not introduce functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c    |   25 ++++++++++++++++----
 block/blk-ioc.c     |   62 ++++++++++++++-------------------------------------
 block/blk.h         |   36 +++++++++++++++++++++++++++--
 block/cfq-iosched.c |    2 +-
 4 files changed, 71 insertions(+), 54 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 39f1289..74b44ab 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -766,9 +766,12 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
-	struct io_context *ioc = NULL;
+	struct io_context *ioc;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
+	bool retried = false;
 	int may_queue;
+retry:
+	ioc = current->io_context;
 
 	if (unlikely(blk_queue_dead(q)))
 		return NULL;
@@ -779,7 +782,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 
 	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
 		if (rl->count[is_sync]+1 >= q->nr_requests) {
-			ioc = current_io_context(GFP_ATOMIC, q->node);
+			/*
+			 * We want ioc to record batching state.  If it's
+			 * not already there, creating a new one requires
+			 * dropping queue_lock, which in turn requires
+			 * retesting conditions to avoid queue hang.
+			 */
+			if (!ioc && !retried) {
+				spin_unlock_irq(q->queue_lock);
+				create_io_context(current, gfp_mask, q->node);
+				spin_lock_irq(q->queue_lock);
+				retried = true;
+				goto retry;
+			}
+
 			/*
 			 * The queue will fill after this allocation, so set
 			 * it as full, and mark this process as "batching".
@@ -887,7 +903,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 	rq = get_request(q, rw_flags, bio, GFP_NOIO);
 	while (!rq) {
 		DEFINE_WAIT(wait);
-		struct io_context *ioc;
 		struct request_list *rl = &q->rq;
 
 		if (unlikely(blk_queue_dead(q)))
@@ -907,8 +922,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 		 * up to a big batch of them for a small period time.
 		 * See ioc_batching, ioc_set_batching
 		 */
-		ioc = current_io_context(GFP_NOIO, q->node);
-		ioc_set_batching(q, ioc);
+		create_io_context(current, GFP_NOIO, q->node);
+		ioc_set_batching(q, current->io_context);
 
 		spin_lock_irq(q->queue_lock);
 		finish_wait(&rl->wait[is_sync], &wait);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index fb23965..e23c797 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -205,16 +205,15 @@ void exit_io_context(struct task_struct *task)
 	put_io_context(ioc, NULL);
 }
 
-static struct io_context *create_task_io_context(struct task_struct *task,
-						 gfp_t gfp_flags, int node,
-						 bool take_ref)
+void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
+				int node)
 {
 	struct io_context *ioc;
 
 	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
 				    node);
 	if (unlikely(!ioc))
-		return NULL;
+		return;
 
 	/* initialize */
 	atomic_long_set(&ioc->refcount, 1);
@@ -226,42 +225,13 @@ static struct io_context *create_task_io_context(struct task_struct *task,
 
 	/* try to install, somebody might already have beaten us to it */
 	task_lock(task);
-
-	if (!task->io_context && !(task->flags & PF_EXITING)) {
+	if (!task->io_context && !(task->flags & PF_EXITING))
 		task->io_context = ioc;
-	} else {
+	else
 		kmem_cache_free(iocontext_cachep, ioc);
-		ioc = task->io_context;
-	}
-
-	if (ioc && take_ref)
-		get_io_context(ioc);
-
 	task_unlock(task);
-	return ioc;
 }
-
-/**
- * current_io_context - get io_context of %current
- * @gfp_flags: allocation flags, used if allocation is necessary
- * @node: allocation node, used if allocation is necessary
- *
- * Return io_context of %current.  If it doesn't exist, it is created with
- * @gfp_flags and @node.  The returned io_context does NOT have its
- * reference count incremented.  Because io_context is exited only on task
- * exit, %current can be sure that the returned io_context is valid and
- * alive as long as it is executing.
- */
-struct io_context *current_io_context(gfp_t gfp_flags, int node)
-{
-	might_sleep_if(gfp_flags & __GFP_WAIT);
-
-	if (current->io_context)
-		return current->io_context;
-
-	return create_task_io_context(current, gfp_flags, node, false);
-}
-EXPORT_SYMBOL(current_io_context);
+EXPORT_SYMBOL(create_io_context_slowpath);
 
 /**
  * get_task_io_context - get io_context of a task
@@ -274,7 +244,7 @@ EXPORT_SYMBOL(current_io_context);
  * incremented.
  *
  * This function always goes through task_lock() and it's better to use
- * current_io_context() + get_io_context() for %current.
+ * %current->io_context + get_io_context() for %current.
  */
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node)
@@ -283,16 +253,18 @@ struct io_context *get_task_io_context(struct task_struct *task,
 
 	might_sleep_if(gfp_flags & __GFP_WAIT);
 
-	task_lock(task);
-	ioc = task->io_context;
-	if (likely(ioc)) {
-		get_io_context(ioc);
+	do {
+		task_lock(task);
+		ioc = task->io_context;
+		if (likely(ioc)) {
+			get_io_context(ioc);
+			task_unlock(task);
+			return ioc;
+		}
 		task_unlock(task);
-		return ioc;
-	}
-	task_unlock(task);
+	} while (create_io_context(task, gfp_flags, node));
 
-	return create_task_io_context(task, gfp_flags, node, true);
+	return NULL;
 }
 EXPORT_SYMBOL(get_task_io_context);
 
diff --git a/block/blk.h b/block/blk.h
index 8d42115..5bca266 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -127,9 +127,6 @@ static inline int blk_should_fake_timeout(struct request_queue *q)
 }
 #endif
 
-void get_io_context(struct io_context *ioc);
-struct io_context *current_io_context(gfp_t gfp_flags, int node);
-
 int ll_back_merge_fn(struct request_queue *q, struct request *req,
 		     struct bio *bio);
 int ll_front_merge_fn(struct request_queue *q, struct request *req, 
@@ -198,6 +195,39 @@ static inline int blk_do_io_stat(struct request *rq)
 	        (rq->cmd_flags & REQ_DISCARD));
 }
 
+/*
+ * Internal io_context interface
+ */
+void get_io_context(struct io_context *ioc);
+
+void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
+				int node);
+
+/**
+ * create_io_context - try to create task->io_context
+ * @task: target task
+ * @gfp_mask: allocation mask
+ * @node: allocation node
+ *
+ * If @task->io_context is %NULL, allocate a new io_context and install it.
+ * Returns the current @task->io_context which may be %NULL if allocation
+ * failed.
+ *
+ * Note that this function can't be called with IRQ disabled because
+ * task_lock which protects @task->io_context is IRQ-unsafe.
+ */
+static inline struct io_context *create_io_context(struct task_struct *task,
+						   gfp_t gfp_mask, int node)
+{
+	WARN_ON_ONCE(irqs_disabled());
+	if (unlikely(!task->io_context))
+		create_io_context_slowpath(task, gfp_mask, node);
+	return task->io_context;
+}
+
+/*
+ * Internal throttling interface
+ */
 #ifdef CONFIG_BLK_DEV_THROTTLING
 extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
 extern void blk_throtl_drain(struct request_queue *q);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3b07ce1..5f7e4d1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3012,7 +3012,7 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
 	/* allocate stuff */
-	ioc = current_io_context(gfp_mask, q->node);
+	ioc = create_io_context(current, gfp_mask, q->node);
 	if (!ioc)
 		goto out;
 
-- 
1.7.3.1


  reply	other threads:[~2011-10-29 23:49 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-10-29 23:49 [PATCHSET block:for-3.2/core] cleanup io_context interface Tejun Heo
2011-10-29 23:49 ` Tejun Heo [this message]
2011-10-29 23:49 ` [PATCH 02/10] block: reorder elevator switch sequence Tejun Heo
2011-10-29 23:49 ` [PATCH 03/10] block: remove elevator_queue->ops Tejun Heo
2011-10-29 23:49 ` [PATCH 04/10] block, cfq: reorganize cfq_io_context into generic and cfq specific parts Tejun Heo
2011-10-29 23:49 ` [PATCH 05/10] block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq Tejun Heo
2011-10-29 23:49 ` [PATCH 06/10] block, cfq: move io_cq lookup to blk-ioc.c Tejun Heo
2011-10-29 23:49 ` [PATCH 07/10] block, cfq: move icq cache management to block core Tejun Heo
2011-10-29 23:49 ` [PATCH 08/10] block, cfq: move io_cq exit/release to blk-ioc.c Tejun Heo
2011-10-29 23:49 ` [PATCH 09/10] block, cfq: restructure io_cq creation path for io_context interface cleanup Tejun Heo
2011-10-29 23:49 ` [PATCH 10/10] block, cfq: move icq creation and rq->elv.icq association to block core Tejun Heo
2011-11-03 15:08 ` [PATCHSET block:for-3.2/core] cleanup io_context interface Tejun Heo
2011-11-04  9:16   ` Jens Axboe
2011-11-04 14:22     ` Tejun Heo
2011-11-04 14:29       ` Jens Axboe
2011-11-14 17:40         ` Tejun Heo
2011-11-14 17:52           ` Jens Axboe
2011-11-14 17:58             ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1319932187-7631-2-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=ctalbott@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rni@google.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.