linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
@ 2012-11-30 10:42 Lukas Czerner
  2012-11-30 10:42 ` [PATCH 2/2 v6] loop: Limit the number of requests in the bio list Lukas Czerner
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Lukas Czerner @ 2012-11-30 10:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-raid, axboe, jmoyer, akpm, Lukas Czerner, Neil Brown,
	David Howells, Ingo Molnar, Peter Zijlstra

New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
moves the private wait_event_lock_irq() macro from MD to regular wait
includes, introduces new macro wait_event_lock_irq_cmd() instead of using
the old method with omitting cmd parameter which is ugly and makes a use
of new macros in the MD. It also introduces the _interruptible_ variant.

The use of new interface is when one have a special lock to protect data
structures used in the condition, or one also needs to invoke "cmd"
before putting it to sleep.

All new macros are expected to be called with the lock taken. The lock
is released before sleep and is reacquired afterwards. We will leave the
macro with the lock held.

Note to DM: IMO this should also fix theoretical race on waitqueue while
using simultaneously wait_event_lock_irq() and wait_event() because of
lack of locking around current state setting and wait queue removal.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Cc: Neil Brown <neilb@suse.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
v2: add interruptible variant, swap cmd and schedule, use prepare_to_wait
v3: swap cmd and schedule so we call cmd before schedule

 drivers/md/md.c      |    2 +-
 drivers/md/md.h      |   26 --------
 drivers/md/raid1.c   |   15 ++---
 drivers/md/raid10.c  |   15 ++---
 drivers/md/raid5.c   |   12 ++--
 include/linux/wait.h |  164 ++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 184 insertions(+), 50 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6120071..880ff5b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
 	spin_lock_irq(&mddev->write_lock);
 	wait_event_lock_irq(mddev->sb_wait,
 			    !mddev->flush_bio,
-			    mddev->write_lock, /*nothing*/);
+			    mddev->write_lock);
 	mddev->flush_bio = bio;
 	spin_unlock_irq(&mddev->write_lock);
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index af443ab..1e2fc3d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -551,32 +551,6 @@ struct md_thread {
 
 #define THREAD_WAKEUP  0
 
-#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
-do {									\
-	wait_queue_t __wait;						\
-	init_waitqueue_entry(&__wait, current);				\
-									\
-	add_wait_queue(&wq, &__wait);					\
-	for (;;) {							\
-		set_current_state(TASK_UNINTERRUPTIBLE);		\
-		if (condition)						\
-			break;						\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	current->state = TASK_RUNNING;					\
-	remove_wait_queue(&wq, &__wait);				\
-} while (0)
-
-#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
-do {									\
-	if (condition)	 						\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
-} while (0)
-
 static inline void safe_put_page(struct page *p)
 {
 	if (p) put_page(p);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 636bae0..f264a62 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf)
 
 	/* Wait until no block IO is waiting */
 	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf)
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf)
 				    (conf->nr_pending &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
+				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf)
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
+	wait_event_lock_irq_cmd(conf->wait_barrier,
+				conf->nr_pending == conf->nr_queued+1,
+				conf->resync_lock,
+				flush_pending_writes(conf));
 	spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(struct r1conf *conf)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0d5d0ff..bd971d1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 
 	/* Wait until no block IO is waiting (unless 'force') */
 	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf)
 				    (conf->nr_pending &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
+				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf)
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
+	wait_event_lock_irq_cmd(conf->wait_barrier,
+				conf->nr_pending == conf->nr_queued+1,
+				conf->resync_lock,
+				flush_pending_writes(conf));
 
 	spin_unlock_irq(&conf->resync_lock);
 }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a450268..6ff30cb 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 	do {
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0 || noquiesce,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
 			if (!conf->inactive_blocked)
@@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 						    (atomic_read(&conf->active_stripes)
 						     < (conf->max_nr_stripes *3/4)
 						     || !conf->inactive_blocked),
-						    conf->device_lock,
-						    );
+						    conf->device_lock);
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, previous);
@@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    !list_empty(&conf->inactive_list),
-				    conf->device_lock,
-				    );
+				    conf->device_lock);
 		osh = get_free_stripe(conf);
 		spin_unlock_irq(&conf->device_lock);
 		atomic_set(&nsh->count, 1);
@@ -4003,7 +4001,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		atomic_inc(&conf->active_aligned_reads);
 		spin_unlock_irq(&conf->device_lock);
 
@@ -6095,7 +6093,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    atomic_read(&conf->active_stripes) == 0 &&
 				    atomic_read(&conf->active_aligned_reads) == 0,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		conf->quiesce = 1;
 		spin_unlock_irq(&conf->device_lock);
 		/* allow reshape to continue */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 168dfe1..7cb64d4 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -550,6 +550,170 @@ do {									\
 	__ret;								\
 })
 
+
+#define __wait_event_lock_irq(wq, condition, lock, cmd)			\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_lock_irq_cmd - sleep until a condition gets true. The
+ *			     condition is checked under the lock. This
+ *			     is expected to be called with the lock
+ *			     taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd
+ *	  and schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *	 sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ */
+#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, cmd);		\
+} while (0)
+
+/**
+ * wait_event_lock_irq - sleep until a condition gets true. The
+ *			 condition is checked under the lock. This
+ *			 is expected to be called with the lock
+ *			 taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ */
+#define wait_event_lock_irq(wq, condition, lock)			\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, );			\
+} while (0)
+
+
+#define __wait_event_interruptible_lock_irq(wq, condition,		\
+					    lock, ret, cmd)		\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
+ *		The condition is checked under the lock. This is expected to
+ *		be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd and
+ *	  schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *	 sleep
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
+({									\
+	int __ret = 0;							\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq(wq, condition,	\
+						    lock, __ret, cmd);	\
+	__ret;								\
+})
+
+/**
+ * wait_event_interruptible_lock_irq - sleep until a condition gets true.
+ *		The condition is checked under the lock. This is expected
+ *		to be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq(wq, condition, lock)		\
+({									\
+	int __ret = 0;							\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq(wq, condition,	\
+						    lock, __ret, );	\
+	__ret;								\
+})
+
+
 /*
  * These are the old interfaces to sleep waiting for an event.
  * They are racy.  DO NOT use them, use the wait_event* interfaces above.
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2 v6] loop: Limit the number of requests in the bio list
  2012-11-30 10:42 [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface Lukas Czerner
@ 2012-11-30 10:42 ` Lukas Czerner
  2012-11-30 13:57   ` Jeff Moyer
  2012-11-30 10:48 ` [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface Andrew Morton
  2012-11-30 21:43 ` Andrew Morton
  2 siblings, 1 reply; 8+ messages in thread
From: Lukas Czerner @ 2012-11-30 10:42 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-raid, axboe, jmoyer, akpm, Lukas Czerner

Currently there is not limitation of number of requests in the loop bio
list. This can lead into some nasty situations when the caller spawns
tons of bio requests taking huge amount of memory. This is even more
obvious with discard where blkdev_issue_discard() will submit all bios
for the range and wait for them to finish afterwards. On really big loop
devices and slow backing file system this can lead to OOM situation as
reported by Dave Chinner.

With this patch we will wait in loop_make_request() if the number of
bios in the loop bio list would exceed 'nr_congestion_on'.
We'll wake up the process as we process the bios form the list. Some
threshold hysteresis is in place to avoid high frequency oscillation.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reported-by: Dave Chinner <dchinner@redhat.com>
---
v2: add threshold hysteresis
v3: Wait uninterruptible, use nr_congestion_off/on
v4: use wait_event_lock_irq()
v5: no change
v6: no change

 drivers/block/loop.c |   10 ++++++++++
 include/linux/loop.h |    3 +++
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 54046e5..ae12512 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -463,6 +463,7 @@ out:
  */
 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
+	lo->lo_bio_count++;
 	bio_list_add(&lo->lo_bio_list, bio);
 }
 
@@ -471,6 +472,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio)
  */
 static struct bio *loop_get_bio(struct loop_device *lo)
 {
+	lo->lo_bio_count--;
 	return bio_list_pop(&lo->lo_bio_list);
 }
 
@@ -489,6 +491,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
 		goto out;
 	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
 		goto out;
+	if (lo->lo_bio_count >= q->nr_congestion_on)
+		wait_event_lock_irq(lo->lo_req_wait,
+				    lo->lo_bio_count < q->nr_congestion_off,
+				    lo->lo_lock);
 	loop_add_bio(lo, old_bio);
 	wake_up(&lo->lo_event);
 	spin_unlock_irq(&lo->lo_lock);
@@ -546,6 +552,8 @@ static int loop_thread(void *data)
 			continue;
 		spin_lock_irq(&lo->lo_lock);
 		bio = loop_get_bio(lo);
+		if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off)
+			wake_up(&lo->lo_req_wait);
 		spin_unlock_irq(&lo->lo_lock);
 
 		BUG_ON(!bio);
@@ -873,6 +881,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->transfer = transfer_none;
 	lo->ioctl = NULL;
 	lo->lo_sizelimit = 0;
+	lo->lo_bio_count = 0;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
@@ -1673,6 +1682,7 @@ static int loop_add(struct loop_device **l, int i)
 	lo->lo_number		= i;
 	lo->lo_thread		= NULL;
 	init_waitqueue_head(&lo->lo_event);
+	init_waitqueue_head(&lo->lo_req_wait);
 	spin_lock_init(&lo->lo_lock);
 	disk->major		= LOOP_MAJOR;
 	disk->first_minor	= i << part_shift;
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 6492181..460b60f 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -53,10 +53,13 @@ struct loop_device {
 
 	spinlock_t		lo_lock;
 	struct bio_list		lo_bio_list;
+	unsigned int		lo_bio_count;
 	int			lo_state;
 	struct mutex		lo_ctl_mutex;
 	struct task_struct	*lo_thread;
 	wait_queue_head_t	lo_event;
+	/* wait queue for incoming requests */
+	wait_queue_head_t	lo_req_wait;
 
 	struct request_queue	*lo_queue;
 	struct gendisk		*lo_disk;
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
  2012-11-30 10:42 [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface Lukas Czerner
  2012-11-30 10:42 ` [PATCH 2/2 v6] loop: Limit the number of requests in the bio list Lukas Czerner
@ 2012-11-30 10:48 ` Andrew Morton
  2012-11-30 11:36   ` Lukáš Czerner
  2012-11-30 21:43 ` Andrew Morton
  2 siblings, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2012-11-30 10:48 UTC (permalink / raw)
  To: Lukas Czerner
  Cc: linux-kernel, linux-raid, axboe, jmoyer, Neil Brown,
	David Howells, Ingo Molnar, Peter Zijlstra

On Fri, 30 Nov 2012 11:42:40 +0100 Lukas Czerner <lczerner@redhat.com> wrote:

> New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
> moves the private wait_event_lock_irq() macro from MD to regular wait
> includes, introduces new macro wait_event_lock_irq_cmd() instead of using
> the old method with omitting cmd parameter which is ugly and makes a use
> of new macros in the MD. It also introduces the _interruptible_ variant.
> 
> The use of new interface is when one have a special lock to protect data
> structures used in the condition, or one also needs to invoke "cmd"
> before putting it to sleep.
> 
> All new macros are expected to be called with the lock taken. The lock
> is released before sleep and is reacquired afterwards. We will leave the
> macro with the lock held.
> 
> Note to DM: IMO this should also fix theoretical race on waitqueue while
> using simultaneously wait_event_lock_irq() and wait_event() because of
> lack of locking around current state setting and wait queue removal.

Does this fix the sparse warning which Fengguang just sent us?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
  2012-11-30 10:48 ` [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface Andrew Morton
@ 2012-11-30 11:36   ` Lukáš Czerner
  2012-11-30 20:13     ` Andrew Morton
  0 siblings, 1 reply; 8+ messages in thread
From: Lukáš Czerner @ 2012-11-30 11:36 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Lukas Czerner, linux-kernel, linux-raid, axboe, jmoyer,
	Neil Brown

On Fri, 30 Nov 2012, Andrew Morton wrote:

> Date: Fri, 30 Nov 2012 02:48:42 -0800
> From: Andrew Morton <akpm@linux-foundation.org>
> To: Lukas Czerner <lczerner@redhat.com>
> Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org, axboe@kernel.dk,
>     jmoyer@redhat.com, Neil Brown <neilb@suse.de>,
>     David Howells <dhowells@redhat.com>, Ingo Molnar <mingo@elte.hu>,
>     Peter Zijlstra <a.p.zijlstra@chello.nl>
> Subject: Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
> 
> On Fri, 30 Nov 2012 11:42:40 +0100 Lukas Czerner <lczerner@redhat.com> wrote:
> 
> > New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
> > moves the private wait_event_lock_irq() macro from MD to regular wait
> > includes, introduces new macro wait_event_lock_irq_cmd() instead of using
> > the old method with omitting cmd parameter which is ugly and makes a use
> > of new macros in the MD. It also introduces the _interruptible_ variant.
> > 
> > The use of new interface is when one have a special lock to protect data
> > structures used in the condition, or one also needs to invoke "cmd"
> > before putting it to sleep.
> > 
> > All new macros are expected to be called with the lock taken. The lock
> > is released before sleep and is reacquired afterwards. We will leave the
> > macro with the lock held.
> > 
> > Note to DM: IMO this should also fix theoretical race on waitqueue while
> > using simultaneously wait_event_lock_irq() and wait_event() because of
> > lack of locking around current state setting and wait queue removal.
> 
> Does this fix the sparse warning which Fengguang just sent us?

Which report from Fengguang do you have in mind ? I do not see any
on linux-kernel today.

/me going to see what spare reports

-Lukas

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2 v6] loop: Limit the number of requests in the bio list
  2012-11-30 10:42 ` [PATCH 2/2 v6] loop: Limit the number of requests in the bio list Lukas Czerner
@ 2012-11-30 13:57   ` Jeff Moyer
  0 siblings, 0 replies; 8+ messages in thread
From: Jeff Moyer @ 2012-11-30 13:57 UTC (permalink / raw)
  To: Lukas Czerner; +Cc: linux-kernel, linux-raid, axboe, akpm

Lukas Czerner <lczerner@redhat.com> writes:

> Currently there is not limitation of number of requests in the loop bio
> list. This can lead into some nasty situations when the caller spawns
> tons of bio requests taking huge amount of memory. This is even more
> obvious with discard where blkdev_issue_discard() will submit all bios
> for the range and wait for them to finish afterwards. On really big loop
> devices and slow backing file system this can lead to OOM situation as
> reported by Dave Chinner.
>
> With this patch we will wait in loop_make_request() if the number of
> bios in the loop bio list would exceed 'nr_congestion_on'.
> We'll wake up the process as we process the bios form the list. Some
> threshold hysteresis is in place to avoid high frequency oscillation.
>
> Signed-off-by: Lukas Czerner <lczerner@redhat.com>
> Reported-by: Dave Chinner <dchinner@redhat.com>

Lukas, please carry forward acked-by tags when there is no change so I
don't have to keep acking.

Thanks,
Jeff

Acked-by: Jeff Moyer <jmoyer@redhat.com>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
  2012-11-30 11:36   ` Lukáš Czerner
@ 2012-11-30 20:13     ` Andrew Morton
  2012-12-03  9:21       ` Lukáš Czerner
  0 siblings, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2012-11-30 20:13 UTC (permalink / raw)
  To: Lukáš Czerner
  Cc: linux-kernel, linux-raid, axboe, jmoyer, Neil Brown

On Fri, 30 Nov 2012 12:36:35 +0100 (CET)
Lukáš Czerner <lczerner@redhat.com> wrote:

> On Fri, 30 Nov 2012, Andrew Morton wrote:
> 
> > Date: Fri, 30 Nov 2012 02:48:42 -0800
> > From: Andrew Morton <akpm@linux-foundation.org>
> > To: Lukas Czerner <lczerner@redhat.com>
> > Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org, axboe@kernel.dk,
> >     jmoyer@redhat.com, Neil Brown <neilb@suse.de>,
> >     David Howells <dhowells@redhat.com>, Ingo Molnar <mingo@elte.hu>,
> >     Peter Zijlstra <a.p.zijlstra@chello.nl>
> > Subject: Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
> > 
> > On Fri, 30 Nov 2012 11:42:40 +0100 Lukas Czerner <lczerner@redhat.com> wrote:
> > 
> > > New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
> > > moves the private wait_event_lock_irq() macro from MD to regular wait
> > > includes, introduces new macro wait_event_lock_irq_cmd() instead of using
> > > the old method with omitting cmd parameter which is ugly and makes a use
> > > of new macros in the MD. It also introduces the _interruptible_ variant.
> > > 
> > > The use of new interface is when one have a special lock to protect data
> > > structures used in the condition, or one also needs to invoke "cmd"
> > > before putting it to sleep.
> > > 
> > > All new macros are expected to be called with the lock taken. The lock
> > > is released before sleep and is reacquired afterwards. We will leave the
> > > macro with the lock held.
> > > 
> > > Note to DM: IMO this should also fix theoretical race on waitqueue while
> > > using simultaneously wait_event_lock_irq() and wait_event() because of
> > > lack of locking around current state setting and wait queue removal.
> > 
> > Does this fix the sparse warning which Fengguang just sent us?
> 
> Which report from Fengguang do you have in mind ? I do not see any
> on linux-kernel today.
> 
> /me going to see what spare reports
> 

On Fri, 30 Nov 2012 16:30:24 +0800
kbuild test robot <fengguang.wu@intel.com> wrote:

> tree:   git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git akpm
> head:   cfb65dadcd079ad4547407a1584bc6b96bd48bb3
> commit: 2b29cdb6f98c86a1da4ec5335d6247392b7c6551 [35/476] wait: add wait_event_lock_irq() interface
> 
> 
> sparse warnings:
> 
> + drivers/block/drbd/drbd_int.h:2339:9: sparse: preprocessor token __wait_event_lock_irq redefined
> include/linux/wait.h:554:9: this was the original definition
> + drivers/block/drbd/drbd_int.h:2358:9: sparse: preprocessor token wait_event_lock_irq redefined
> include/linux/wait.h:621:9: this was the original definition
> drivers/block/drbd/drbd_interval.h:12:63: sparse: dubious one-bit signed bitfield
> drivers/block/drbd/drbd_interval.h:13:22: sparse: dubious one-bit signed bitfield
> drivers/block/drbd/drbd_int.h:903:39: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1123:69: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1124:70: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1125:59: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1126:63: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1127:60: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1128:71: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1129:65: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1130:66: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1335:74: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1336:50: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1338:51: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1339:58: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1340:54: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1341:62: sparse: attribute 'require_context': unknown attribute
> drivers/block/drbd/drbd_int.h:1435:92: sparse: attribute 'require_context': unknown attribute

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
  2012-11-30 10:42 [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface Lukas Czerner
  2012-11-30 10:42 ` [PATCH 2/2 v6] loop: Limit the number of requests in the bio list Lukas Czerner
  2012-11-30 10:48 ` [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface Andrew Morton
@ 2012-11-30 21:43 ` Andrew Morton
  2 siblings, 0 replies; 8+ messages in thread
From: Andrew Morton @ 2012-11-30 21:43 UTC (permalink / raw)
  To: Lukas Czerner
  Cc: linux-kernel, linux-raid, axboe, jmoyer, Neil Brown,
	David Howells, Ingo Molnar, Peter Zijlstra

On Fri, 30 Nov 2012 11:42:40 +0100
Lukas Czerner <lczerner@redhat.com> wrote:

> New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
> moves the private wait_event_lock_irq() macro from MD to regular wait
> includes, introduces new macro wait_event_lock_irq_cmd() instead of using
> the old method with omitting cmd parameter which is ugly and makes a use
> of new macros in the MD. It also introduces the _interruptible_ variant.
> 
> The use of new interface is when one have a special lock to protect data
> structures used in the condition, or one also needs to invoke "cmd"
> before putting it to sleep.
> 
> All new macros are expected to be called with the lock taken. The lock
> is released before sleep and is reacquired afterwards. We will leave the
> macro with the lock held.
> 
> Note to DM: IMO this should also fix theoretical race on waitqueue while
> using simultaneously wait_event_lock_irq() and wait_event() because of
> lack of locking around current state setting and wait queue removal.
> 
> Signed-off-by: Lukas Czerner <lczerner@redhat.com>
> Cc: Neil Brown <neilb@suse.de>
> Cc: David Howells <dhowells@redhat.com>
> Cc: Ingo Molnar <mingo@elte.hu>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> v2: add interruptible variant, swap cmd and schedule, use prepare_to_wait
> v3: swap cmd and schedule so we call cmd before schedule

Your v3 patch reverts
http://ozlabs.org/~akpm/mmots/broken-out/wait-add-wait_event_lock_irq-interface-fix.patch.

Here's what I've queued in -mm:

--- a/include/linux/wait.h~wait-add-wait_event_lock_irq-interface-v3
+++ a/include/linux/wait.h
@@ -555,14 +555,13 @@ do {									\
 do {									\
 	DEFINE_WAIT(__wait);						\
 									\
-	cmd;								\
 	for (;;) {							\
 		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
 		spin_unlock_irq(&lock);					\
-		schedule();						\
 		cmd;							\
+		schedule();						\
 		spin_lock_irq(&lock);					\
 	}								\
 	finish_wait(&wq, &__wait);					\
@@ -575,8 +574,8 @@ do {									\
  *			     taken.
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
- * @lock: a locked spinlock_t, which will be released before schedule()
- *	  and cmd reacquired afterwards.
+ * @lock: a locked spinlock_t, which will be released before cmd
+ *	  and schedule() and reacquired afterwards.
  * @cmd: a command which is invoked outside the critical section before
  *	 sleep
  *
@@ -588,7 +587,7 @@ do {									\
  * change the result of the wait condition.
  *
  * This is supposed to be called while holding the lock. The lock is
- * dropped before going to sleep and invoking the cmd and is reacquired
+ * dropped before invoking the cmd and going to sleep and is reacquired
  * afterwards.
  */
 #define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
@@ -631,7 +630,6 @@ do {									\
 do {									\
 	DEFINE_WAIT(__wait);						\
 									\
-	cmd;								\
 	for (;;) {							\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
@@ -641,8 +639,8 @@ do {									\
 			break;						\
 		}							\
 		spin_unlock_irq(&lock);					\
-		schedule();						\
 		cmd;							\
+		schedule();						\
 		spin_lock_irq(&lock);					\
 	}								\
 	finish_wait(&wq, &__wait);					\
@@ -654,8 +652,8 @@ do {									\
  *		be called with the lock taken.
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
- * @lock: a locked spinlock_t, which will be released before schedule()
- *	  and cmd reacquired afterwards.
+ * @lock: a locked spinlock_t, which will be released before cmd and
+ *	  schedule() and reacquired afterwards.
  * @cmd: a command which is invoked outside the critical section before
  *	 sleep
  *
@@ -667,7 +665,7 @@ do {									\
  * change the result of the wait condition.
  *
  * This is supposed to be called while holding the lock. The lock is
- * dropped before going to sleep and invoking the cmd and is reacquired
+ * dropped before invoking the cmd and going to sleep and is reacquired
  * afterwards.
  *
  * The macro will return -ERESTARTSYS if it was interrupted by a signal
_

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
  2012-11-30 20:13     ` Andrew Morton
@ 2012-12-03  9:21       ` Lukáš Czerner
  0 siblings, 0 replies; 8+ messages in thread
From: Lukáš Czerner @ 2012-12-03  9:21 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Lukáš Czerner, linux-kernel, linux-raid, axboe, jmoyer,
	Neil Brown

[-- Attachment #1: Type: TEXT/PLAIN, Size: 5082 bytes --]

On Fri, 30 Nov 2012, Andrew Morton wrote:

> Date: Fri, 30 Nov 2012 12:13:08 -0800
> From: Andrew Morton <akpm@linux-foundation.org>
> To: Lukáš Czerner <lczerner@redhat.com>
> Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org, axboe@kernel.dk,
>     jmoyer@redhat.com, Neil Brown <neilb@suse.de>
> Subject: Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
> 
> On Fri, 30 Nov 2012 12:36:35 +0100 (CET)
> Lukáš Czerner <lczerner@redhat.com> wrote:
> 
> > On Fri, 30 Nov 2012, Andrew Morton wrote:
> > 
> > > Date: Fri, 30 Nov 2012 02:48:42 -0800
> > > From: Andrew Morton <akpm@linux-foundation.org>
> > > To: Lukas Czerner <lczerner@redhat.com>
> > > Cc: linux-kernel@vger.kernel.org, linux-raid@vger.kernel.org, axboe@kernel.dk,
> > >     jmoyer@redhat.com, Neil Brown <neilb@suse.de>,
> > >     David Howells <dhowells@redhat.com>, Ingo Molnar <mingo@elte.hu>,
> > >     Peter Zijlstra <a.p.zijlstra@chello.nl>
> > > Subject: Re: [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface
> > > 
> > > On Fri, 30 Nov 2012 11:42:40 +0100 Lukas Czerner <lczerner@redhat.com> wrote:
> > > 
> > > > New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
> > > > moves the private wait_event_lock_irq() macro from MD to regular wait
> > > > includes, introduces new macro wait_event_lock_irq_cmd() instead of using
> > > > the old method with omitting cmd parameter which is ugly and makes a use
> > > > of new macros in the MD. It also introduces the _interruptible_ variant.
> > > > 
> > > > The use of new interface is when one have a special lock to protect data
> > > > structures used in the condition, or one also needs to invoke "cmd"
> > > > before putting it to sleep.
> > > > 
> > > > All new macros are expected to be called with the lock taken. The lock
> > > > is released before sleep and is reacquired afterwards. We will leave the
> > > > macro with the lock held.
> > > > 
> > > > Note to DM: IMO this should also fix theoretical race on waitqueue while
> > > > using simultaneously wait_event_lock_irq() and wait_event() because of
> > > > lack of locking around current state setting and wait queue removal.
> > > 
> > > Does this fix the sparse warning which Fengguang just sent us?
> > 
> > Which report from Fengguang do you have in mind ? I do not see any
> > on linux-kernel today.
> > 
> > /me going to see what spare reports
> > 
> 
> On Fri, 30 Nov 2012 16:30:24 +0800
> kbuild test robot <fengguang.wu@intel.com> wrote:
> 
> > tree:   git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git akpm
> > head:   cfb65dadcd079ad4547407a1584bc6b96bd48bb3
> > commit: 2b29cdb6f98c86a1da4ec5335d6247392b7c6551 [35/476] wait: add wait_event_lock_irq() interface
> > 
> > 
> > sparse warnings:
> > 
> > + drivers/block/drbd/drbd_int.h:2339:9: sparse: preprocessor token __wait_event_lock_irq redefined
> > include/linux/wait.h:554:9: this was the original definition
> > + drivers/block/drbd/drbd_int.h:2358:9: sparse: preprocessor token wait_event_lock_irq redefined
> > include/linux/wait.h:621:9: this was the original definition
> > drivers/block/drbd/drbd_interval.h:12:63: sparse: dubious one-bit signed bitfield
> > drivers/block/drbd/drbd_interval.h:13:22: sparse: dubious one-bit signed bitfield
> > drivers/block/drbd/drbd_int.h:903:39: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1123:69: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1124:70: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1125:59: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1126:63: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1127:60: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1128:71: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1129:65: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1130:66: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1335:74: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1336:50: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1338:51: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1339:58: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1340:54: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1341:62: sparse: attribute 'require_context': unknown attribute
> > drivers/block/drbd/drbd_int.h:1435:92: sparse: attribute 'require_context': unknown attribute
> 

I believe that Lars send patch for that already. So no,

'[PATCH 1/2 v3] wait: add wait_event_lock_irq() interface'

does not fix the issue, but another patch should.

http://www.spinics.net/lists/linux-next/msg23042.html

Thanks!
-Lukas

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-12-03  9:21 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-11-30 10:42 [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface Lukas Czerner
2012-11-30 10:42 ` [PATCH 2/2 v6] loop: Limit the number of requests in the bio list Lukas Czerner
2012-11-30 13:57   ` Jeff Moyer
2012-11-30 10:48 ` [PATCH 1/2 v3] wait: add wait_event_lock_irq() interface Andrew Morton
2012-11-30 11:36   ` Lukáš Czerner
2012-11-30 20:13     ` Andrew Morton
2012-12-03  9:21       ` Lukáš Czerner
2012-11-30 21:43 ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).