From: Tejun Heo <tj@kernel.org>
To: Shaohua Li <shaohua.li@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Jens Axboe <axboe@kernel.dk>, Vivek Goyal <vgoyal@redhat.com>,
lkml <linux-kernel@vger.kernel.org>,
Knut Petersen <Knut_Petersen@t-online.de>,
mroos@linux.ee
Subject: Re: [PATCH] block: strip out locking optimization in put_io_context()
Date: Wed, 8 Feb 2012 08:29:25 -0800 [thread overview]
Message-ID: <20120208162925.GA19392@google.com> (raw)
In-Reply-To: <CANejiEXqLxJ=mhDYA04g4ZHo3CiXctCUVnHu1--yp0GYgPxOKQ@mail.gmail.com>
Hello, Shaohua.
On Wed, Feb 08, 2012 at 04:29:53PM +0800, Shaohua Li wrote:
> > the test adds mem=4G in a 2 sockets 16 CPU machine.
> > just make several copy of kernel source in tmpfs (so there is swap
> > depending on your
> > memory size) and run kernel build in the kernel source in the meaning time.
> > there is only one swap device which is a rotating disk.
> >
> > I'll test both patches soon.
>
> Tried all the options, the regression still exists. Any new idea?
> I'll spend some time on it if I can get anything
Can you please try the following one? Thanks a lot!
---
block/blk-cgroup.c | 2
block/blk-core.c | 2
block/blk-ioc.c | 99 +++++++++++++---------------------------------
block/cfq-iosched.c | 2
fs/ioprio.c | 2
include/linux/iocontext.h | 8 +--
kernel/fork.c | 2
7 files changed, 37 insertions(+), 80 deletions(-)
Index: work/block/blk-ioc.c
===================================================================
--- work.orig/block/blk-ioc.c
+++ work/block/blk-ioc.c
@@ -8,9 +8,12 @@
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/slab.h>
+#include <linux/rwlock.h>
#include "blk.h"
+static DEFINE_RWLOCK(ioc_clear_rwlock);
+
/*
* For io context allocations
*/
@@ -45,7 +48,7 @@ static void ioc_exit_icq(struct io_cq *i
struct request_queue *q = icq->q;
struct elevator_type *et = q->elevator->type;
- lockdep_assert_held(&ioc->lock);
+ //lockdep_assert_held(&ioc->lock);
lockdep_assert_held(q->queue_lock);
radix_tree_delete(&ioc->icq_tree, icq->q->id);
@@ -71,63 +74,6 @@ static void ioc_exit_icq(struct io_cq *i
call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
}
-/*
- * Slow path for ioc release in put_io_context(). Performs double-lock
- * dancing to unlink all icq's and then frees ioc.
- */
-static void ioc_release_fn(struct work_struct *work)
-{
- struct io_context *ioc = container_of(work, struct io_context,
- release_work);
- struct request_queue *last_q = NULL;
-
- spin_lock_irq(&ioc->lock);
-
- while (!hlist_empty(&ioc->icq_list)) {
- struct io_cq *icq = hlist_entry(ioc->icq_list.first,
- struct io_cq, ioc_node);
- struct request_queue *this_q = icq->q;
-
- if (this_q != last_q) {
- /*
- * Need to switch to @this_q. Once we release
- * @ioc->lock, it can go away along with @cic.
- * Hold on to it.
- */
- __blk_get_queue(this_q);
-
- /*
- * blk_put_queue() might sleep thanks to kobject
- * idiocy. Always release both locks, put and
- * restart.
- */
- if (last_q) {
- spin_unlock(last_q->queue_lock);
- spin_unlock_irq(&ioc->lock);
- blk_put_queue(last_q);
- } else {
- spin_unlock_irq(&ioc->lock);
- }
-
- last_q = this_q;
- spin_lock_irq(this_q->queue_lock);
- spin_lock(&ioc->lock);
- continue;
- }
- ioc_exit_icq(icq);
- }
-
- if (last_q) {
- spin_unlock(last_q->queue_lock);
- spin_unlock_irq(&ioc->lock);
- blk_put_queue(last_q);
- } else {
- spin_unlock_irq(&ioc->lock);
- }
-
- kmem_cache_free(iocontext_cachep, ioc);
-}
-
/**
* put_io_context - put a reference of io_context
* @ioc: io_context to put
@@ -135,7 +81,7 @@ static void ioc_release_fn(struct work_s
* Decrement reference count of @ioc and release it if the count reaches
* zero.
*/
-void put_io_context(struct io_context *ioc)
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
{
unsigned long flags;
@@ -144,16 +90,26 @@ void put_io_context(struct io_context *i
BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
- /*
- * Releasing ioc requires reverse order double locking and we may
- * already be holding a queue_lock. Do it asynchronously from wq.
- */
- if (atomic_long_dec_and_test(&ioc->refcount)) {
- spin_lock_irqsave(&ioc->lock, flags);
- if (!hlist_empty(&ioc->icq_list))
- schedule_work(&ioc->release_work);
- spin_unlock_irqrestore(&ioc->lock, flags);
+ if (!atomic_long_dec_and_test(&ioc->refcount))
+ return;
+
+ read_lock_irqsave(&ioc_clear_rwlock, flags);
+
+ while (!hlist_empty(&ioc->icq_list)) {
+ struct io_cq *icq = hlist_entry(ioc->icq_list.first,
+ struct io_cq, ioc_node);
+ struct request_queue *q = icq->q;
+
+ if (q != locked_q)
+ spin_lock_nested(q->queue_lock, 1);
+
+ ioc_exit_icq(icq);
+
+ if (q != locked_q)
+ spin_unlock(q->queue_lock);
}
+
+ read_unlock_irqrestore(&ioc_clear_rwlock, flags);
}
EXPORT_SYMBOL(put_io_context);
@@ -168,7 +124,7 @@ void exit_io_context(struct task_struct
task_unlock(task);
atomic_dec(&ioc->nr_tasks);
- put_io_context(ioc);
+ put_io_context(ioc, NULL);
}
/**
@@ -181,6 +137,8 @@ void ioc_clear_queue(struct request_queu
{
lockdep_assert_held(q->queue_lock);
+ write_lock(&ioc_clear_rwlock);
+
while (!list_empty(&q->icq_list)) {
struct io_cq *icq = list_entry(q->icq_list.next,
struct io_cq, q_node);
@@ -190,6 +148,8 @@ void ioc_clear_queue(struct request_queu
ioc_exit_icq(icq);
spin_unlock(&ioc->lock);
}
+
+ write_unlock(&ioc_clear_rwlock);
}
void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
@@ -208,7 +168,6 @@ void create_io_context_slowpath(struct t
spin_lock_init(&ioc->lock);
INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ioc->icq_list);
- INIT_WORK(&ioc->release_work, ioc_release_fn);
/*
* Try to install. ioc shouldn't be installed if someone else
Index: work/include/linux/iocontext.h
===================================================================
--- work.orig/include/linux/iocontext.h
+++ work/include/linux/iocontext.h
@@ -3,7 +3,6 @@
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
-#include <linux/workqueue.h>
enum {
ICQ_IOPRIO_CHANGED,
@@ -113,8 +112,6 @@ struct io_context {
struct radix_tree_root icq_tree;
struct io_cq __rcu *icq_hint;
struct hlist_head icq_list;
-
- struct work_struct release_work;
};
static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -133,7 +130,7 @@ static inline struct io_context *ioc_tas
struct task_struct;
#ifdef CONFIG_BLOCK
-void put_io_context(struct io_context *ioc);
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q);
void exit_io_context(struct task_struct *task);
struct io_context *get_task_io_context(struct task_struct *task,
gfp_t gfp_flags, int node);
@@ -141,7 +138,8 @@ void ioc_ioprio_changed(struct io_contex
void ioc_cgroup_changed(struct io_context *ioc);
#else
struct io_context;
-static inline void put_io_context(struct io_context *ioc) { }
+static inline void put_io_context(struct io_context *ioc,
+ struct request_queue *locked_q) { }
static inline void exit_io_context(struct task_struct *task) { }
#endif
Index: work/block/blk-cgroup.c
===================================================================
--- work.orig/block/blk-cgroup.c
+++ work/block/blk-cgroup.c
@@ -1659,7 +1659,7 @@ static void blkiocg_attach(struct cgroup
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
if (ioc) {
ioc_cgroup_changed(ioc);
- put_io_context(ioc);
+ put_io_context(ioc, NULL);
}
}
}
Index: work/block/blk-core.c
===================================================================
--- work.orig/block/blk-core.c
+++ work/block/blk-core.c
@@ -642,7 +642,7 @@ static inline void blk_free_request(stru
if (rq->cmd_flags & REQ_ELVPRIV) {
elv_put_request(q, rq);
if (rq->elv.icq)
- put_io_context(rq->elv.icq->ioc);
+ put_io_context(rq->elv.icq->ioc, q);
}
mempool_free(rq, q->rq.rq_pool);
Index: work/block/cfq-iosched.c
===================================================================
--- work.orig/block/cfq-iosched.c
+++ work/block/cfq-iosched.c
@@ -1787,7 +1787,7 @@ __cfq_slice_expired(struct cfq_data *cfq
cfqd->active_queue = NULL;
if (cfqd->active_cic) {
- put_io_context(cfqd->active_cic->icq.ioc);
+ put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
cfqd->active_cic = NULL;
}
}
Index: work/fs/ioprio.c
===================================================================
--- work.orig/fs/ioprio.c
+++ work/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
if (ioc) {
ioc_ioprio_changed(ioc, ioprio);
- put_io_context(ioc);
+ put_io_context(ioc, NULL);
}
return err;
Index: work/kernel/fork.c
===================================================================
--- work.orig/kernel/fork.c
+++ work/kernel/fork.c
@@ -890,7 +890,7 @@ static int copy_io(unsigned long clone_f
return -ENOMEM;
new_ioc->ioprio = ioc->ioprio;
- put_io_context(new_ioc);
+ put_io_context(new_ioc, NULL);
}
#endif
return 0;
next prev parent reply other threads:[~2012-02-08 16:29 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-06 7:50 [patch]block: fix ioc locking warning Shaohua Li
2012-02-06 7:55 ` Jens Axboe
2012-02-06 15:12 ` Vivek Goyal
2012-02-06 16:09 ` Jens Axboe
2012-02-06 16:37 ` Vivek Goyal
2012-02-06 16:44 ` Tejun Heo
2012-02-06 16:58 ` Linus Torvalds
2012-02-06 17:27 ` Tejun Heo
2012-02-06 20:16 ` Jens Axboe
2012-02-06 21:54 ` [PATCH] block: strip out locking optimization in put_io_context() Tejun Heo
2012-02-07 6:49 ` Jens Axboe
2012-02-07 16:22 ` Tejun Heo
2012-02-07 16:28 ` Jens Axboe
2012-02-07 16:33 ` Linus Torvalds
2012-02-07 16:47 ` Tejun Heo
2012-02-07 17:17 ` Tejun Heo
2012-02-08 0:19 ` Shaohua Li
2012-02-08 8:29 ` Shaohua Li
2012-02-08 16:29 ` Tejun Heo [this message]
2012-02-08 16:34 ` Linus Torvalds
2012-02-08 16:49 ` Tejun Heo
2012-02-08 16:56 ` Tejun Heo
2012-02-08 17:23 ` Tejun Heo
2012-02-09 6:22 ` Shaohua Li
2012-02-09 17:59 ` Tejun Heo
2012-02-09 18:07 ` Linus Torvalds
2012-02-09 19:24 ` Tejun Heo
2012-02-09 23:48 ` Tejun Heo
2012-02-10 5:14 ` Shaohua Li
2012-02-10 8:48 ` Shaohua Li
2012-02-11 2:17 ` Tejun Heo
2012-02-11 11:35 ` Jens Axboe
2012-02-13 1:34 ` Shaohua Li
2012-02-13 20:49 ` Tejun Heo
2012-02-14 2:36 ` Shaohua Li
2012-02-14 16:39 ` Tejun Heo
2012-02-10 3:09 ` Shaohua Li
2012-02-07 23:00 ` [PATCH] block: fix lockdep warning on io_context release put_io_context() Tejun Heo
2012-02-06 20:36 ` [patch]block: fix ioc locking warning Tejun Heo
2012-02-07 0:31 ` Shaohua Li
2012-02-07 0:39 ` Tejun Heo
2012-02-07 0:43 ` Shaohua Li
2012-02-07 0:59 ` Tejun Heo
2012-02-07 1:10 ` Shaohua Li
2012-02-07 1:33 ` Shaohua Li
2012-02-07 5:22 ` Shaohua Li
2012-02-07 22:34 ` Linus Torvalds
2012-02-06 16:22 ` Tejun Heo
2012-02-08 18:07 ` walt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120208162925.GA19392@google.com \
--to=tj@kernel.org \
--cc=Knut_Petersen@t-online.de \
--cc=axboe@kernel.dk \
--cc=linux-kernel@vger.kernel.org \
--cc=mroos@linux.ee \
--cc=shaohua.li@intel.com \
--cc=torvalds@linux-foundation.org \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.