linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* fix use after free in raid5-cache
@ 2015-09-02 14:14 Christoph Hellwig
  2015-09-02 14:14 ` [PATCH 1/3] raid5-cache: move functionality out of __r5l_set_io_unit_state Christoph Hellwig
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Christoph Hellwig @ 2015-09-02 14:14 UTC (permalink / raw)
  To: shli, neilb; +Cc: dan.j.williams, linux-raid, Kernel-team

Hi Shaohua, hi Neil,

this series fixes a use after free of the r5l_io_unit structure I ran into
while testing the caching code.  The real fix is in patch 3, but other two
patches contain refactoring to enable the fix.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/3] raid5-cache: move functionality out of __r5l_set_io_unit_state
  2015-09-02 14:14 fix use after free in raid5-cache Christoph Hellwig
@ 2015-09-02 14:14 ` Christoph Hellwig
  2015-09-02 14:14 ` [PATCH 2/3] raid5-cache: remove r5l_move_io_unit_list Christoph Hellwig
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2015-09-02 14:14 UTC (permalink / raw)
  To: shli, neilb; +Cc: dan.j.williams, linux-raid, Kernel-team

Just keep __r5l_set_io_unit_state as a small set the state wrapper,
and remove r5l_set_io_unit_state entirely after moving the real
functionality to the two callers that need it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/md/raid5-cache.c | 84 ++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 52feb90..9f984f8 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -213,58 +213,33 @@ static void r5l_compress_stripe_end_list(struct r5l_log *log)
 }
 
 static void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
-static void __r5l_set_io_unit_state(struct r5l_io_unit *io,
-	enum r5l_io_unit_state state)
-{
-	struct r5l_log *log = io->log;
 
+static inline void __r5l_set_io_unit_state(struct r5l_io_unit *io,
+		enum r5l_io_unit_state state)
+{
 	if (WARN_ON(io->state >= state))
 		return;
 	io->state = state;
-	if (state == IO_UNIT_IO_END)
-		r5l_move_io_unit_list(&log->running_ios, &log->io_end_ios,
-			IO_UNIT_IO_END);
-	if (state == IO_UNIT_STRIPE_END) {
-		struct r5l_io_unit *last;
-		sector_t reclaimable_space;
-
-		r5l_move_io_unit_list(&log->io_end_ios, &log->stripe_end_ios,
-			IO_UNIT_STRIPE_END);
-
-		last = list_last_entry(&log->stripe_end_ios,
-				struct r5l_io_unit, log_sibling);
-		reclaimable_space = r5l_ring_distance(log, log->last_checkpoint,
-					last->log_end);
-		if (reclaimable_space >= log->max_free_space)
-			r5l_wake_reclaim(log, 0);
-
-		r5l_compress_stripe_end_list(log);
-	}
-	wake_up(&io->wait_state);
-}
-
-static void r5l_set_io_unit_state(struct r5l_io_unit *io,
-	enum r5l_io_unit_state state)
-{
-	struct r5l_log *log = io->log;
-	unsigned long flags;
-
-	spin_lock_irqsave(&log->io_list_lock, flags);
-	__r5l_set_io_unit_state(io, state);
-	spin_unlock_irqrestore(&log->io_list_lock, flags);
 }
 
 static void r5l_log_endio(struct bio *bio, int error)
 {
 	struct r5l_io_unit *io = bio->bi_private;
 	struct r5l_log *log = io->log;
+	unsigned long flags;
 
 	bio_put(bio);
 
 	if (!atomic_dec_and_test(&io->pending_io))
 		return;
 
-	r5l_set_io_unit_state(io, IO_UNIT_IO_END);
+	spin_lock_irqsave(&log->io_list_lock, flags);
+	__r5l_set_io_unit_state(io, IO_UNIT_IO_END);
+	r5l_move_io_unit_list(&log->running_ios, &log->io_end_ios,
+			IO_UNIT_IO_END);
+	spin_unlock_irqrestore(&log->io_list_lock, flags);
+
+	wake_up(&io->wait_state);
 	md_wakeup_thread(log->rdev->mddev->thread);
 }
 
@@ -273,6 +248,7 @@ static void r5l_submit_current_io(struct r5l_log *log)
 	struct r5l_io_unit *io = log->current_io;
 	struct r5l_meta_block *block;
 	struct bio *bio;
+	unsigned long flags;
 	u32 crc;
 
 	if (!io)
@@ -284,7 +260,10 @@ static void r5l_submit_current_io(struct r5l_log *log)
 	block->checksum = cpu_to_le32(crc);
 
 	log->current_io = NULL;
-	r5l_set_io_unit_state(io, IO_UNIT_IO_START);
+
+	spin_lock_irqsave(&io->log->io_list_lock, flags);
+	__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
+	spin_unlock_irqrestore(&io->log->io_list_lock, flags);
 
 	while ((bio = bio_list_pop(&io->bios))) {
 		/* all IO must start from rdev->data_offset */
@@ -540,18 +519,39 @@ static void r5l_run_no_space_stripes(struct r5l_log *log)
 	spin_unlock(&log->no_space_stripes_lock);
 }
 
+static void __r5l_stripe_write_finished(struct r5l_io_unit *io)
+{
+	struct r5l_log *log = io->log;
+	struct r5l_io_unit *last;
+	sector_t reclaimable_space;
+	unsigned long flags;
+
+	spin_lock_irqsave(&log->io_list_lock, flags);
+	__r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
+	r5l_move_io_unit_list(&log->io_end_ios, &log->stripe_end_ios,
+		IO_UNIT_STRIPE_END);
+	spin_unlock_irqrestore(&log->io_list_lock, flags);
+	wake_up(&io->wait_state);
+
+	last = list_last_entry(&log->stripe_end_ios,
+			struct r5l_io_unit, log_sibling);
+	reclaimable_space = r5l_ring_distance(log, log->last_checkpoint,
+				last->log_end);
+	if (reclaimable_space >= log->max_free_space)
+		r5l_wake_reclaim(log, 0);
+
+	r5l_compress_stripe_end_list(log);
+}
+
 void r5l_stripe_write_finished(struct stripe_head *sh)
 {
 	struct r5l_io_unit *io;
 
-	/* Don't support stripe batch */
 	io = sh->log_io;
-	if (!io)
-		return;
 	sh->log_io = NULL;
 
-	if (atomic_dec_and_test(&io->pending_stripe))
-		r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
+	if (io && atomic_dec_and_test(&io->pending_stripe))
+		__r5l_stripe_write_finished(io);
 }
 
 /*
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] raid5-cache: remove r5l_move_io_unit_list
  2015-09-02 14:14 fix use after free in raid5-cache Christoph Hellwig
  2015-09-02 14:14 ` [PATCH 1/3] raid5-cache: move functionality out of __r5l_set_io_unit_state Christoph Hellwig
@ 2015-09-02 14:14 ` Christoph Hellwig
  2015-09-02 14:14 ` [PATCH 3/3] raid5-cache: fix __r5l_stripe_write_finished Christoph Hellwig
  2015-09-02 16:55 ` fix use after free in raid5-cache Shaohua Li
  3 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2015-09-02 14:14 UTC (permalink / raw)
  To: shli, neilb; +Cc: dan.j.williams, linux-raid, Kernel-team

We only move I/O unit structures from one list to another under the
io_list_lock, so there is no need to scan the whole list for other
elements of the same state - we only change the state for one of them
at a time.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/md/raid5-cache.c | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 9f984f8..8684100 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -168,21 +168,6 @@ static void r5l_free_io_unit(struct r5l_log *log, struct r5l_io_unit *io)
 	kmem_cache_free(log->io_kc, io);
 }
 
-static void r5l_move_io_unit_list(struct list_head *from, struct list_head *to,
-       enum r5l_io_unit_state state)
-{
-       struct r5l_io_unit *io;
-
-       while (!list_empty(from)) {
-               io = list_first_entry(from, struct r5l_io_unit, log_sibling);
-               /* don't change list order */
-               if (io->state >= state)
-                       list_move_tail(&io->log_sibling, to);
-               else
-                       break;
-       }
-}
-
 /*
  * We don't want too many io_units reside in stripe_end_ios list, which will
  * waste a lot of memory. So we try to remove some. But we must keep at least 2
@@ -235,8 +220,7 @@ static void r5l_log_endio(struct bio *bio, int error)
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
 	__r5l_set_io_unit_state(io, IO_UNIT_IO_END);
-	r5l_move_io_unit_list(&log->running_ios, &log->io_end_ios,
-			IO_UNIT_IO_END);
+	list_move_tail(&io->log_sibling, &log->io_end_ios);
 	spin_unlock_irqrestore(&log->io_list_lock, flags);
 
 	wake_up(&io->wait_state);
@@ -528,8 +512,7 @@ static void __r5l_stripe_write_finished(struct r5l_io_unit *io)
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
 	__r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
-	r5l_move_io_unit_list(&log->io_end_ios, &log->stripe_end_ios,
-		IO_UNIT_STRIPE_END);
+	list_move_tail(&io->log_sibling, &log->stripe_end_ios);
 	spin_unlock_irqrestore(&log->io_list_lock, flags);
 	wake_up(&io->wait_state);
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] raid5-cache: fix __r5l_stripe_write_finished
  2015-09-02 14:14 fix use after free in raid5-cache Christoph Hellwig
  2015-09-02 14:14 ` [PATCH 1/3] raid5-cache: move functionality out of __r5l_set_io_unit_state Christoph Hellwig
  2015-09-02 14:14 ` [PATCH 2/3] raid5-cache: remove r5l_move_io_unit_list Christoph Hellwig
@ 2015-09-02 14:14 ` Christoph Hellwig
  2015-09-02 16:55 ` fix use after free in raid5-cache Shaohua Li
  3 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2015-09-02 14:14 UTC (permalink / raw)
  To: shli, neilb; +Cc: dan.j.williams, linux-raid, Kernel-team

The stripe end processing has two flaws:

 - it walks the stripe_end_ios list for finding the last I/O unit
   and collapsing unneded ones without taking the proper lock
 - it may remove the currently completed io unit, which will cause
   a use after free when calling wake_up on io->wait_state

Fix this by ensuring we hold io_list_lock over the whole function,
and rewrite the I/O unit colapsing algorithm to be smarted about
handling the stripe_end_ios list.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/md/raid5-cache.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 8684100..054bad4 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -174,27 +174,23 @@ static void r5l_free_io_unit(struct r5l_log *log, struct r5l_io_unit *io)
  * io_units. The superblock must point to a valid meta, if it's the last meta,
  * recovery can scan less
  * */
-static void r5l_compress_stripe_end_list(struct r5l_log *log)
+static void r5l_compress_stripe_end_list(struct r5l_log *log,
+		struct r5l_io_unit *last)
 {
-	struct r5l_io_unit *first, *last, *io;
+	struct r5l_io_unit *first, *cur;
+
+	assert_spin_locked(&log->io_list_lock);
 
 	first = list_first_entry(&log->stripe_end_ios,
 		struct r5l_io_unit, log_sibling);
-	last = list_last_entry(&log->stripe_end_ios,
-		struct r5l_io_unit, log_sibling);
 	if (first == last)
 		return;
-	list_del(&first->log_sibling);
-	list_del(&last->log_sibling);
-	while (!list_empty(&log->stripe_end_ios)) {
-		io = list_first_entry(&log->stripe_end_ios,
-			struct r5l_io_unit, log_sibling);
-		list_del(&io->log_sibling);
-		first->log_end = io->log_end;
-		r5l_free_io_unit(log, io);
+
+	while ((cur = list_next_entry(first, log_sibling)) != last) {
+		list_del(&cur->log_sibling);
+		first->log_end = cur->log_end;
+		r5l_free_io_unit(log, cur);
 	}
-	list_add_tail(&first->log_sibling, &log->stripe_end_ios);
-	list_add_tail(&last->log_sibling, &log->stripe_end_ios);
 }
 
 static void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
@@ -506,24 +502,21 @@ static void r5l_run_no_space_stripes(struct r5l_log *log)
 static void __r5l_stripe_write_finished(struct r5l_io_unit *io)
 {
 	struct r5l_log *log = io->log;
-	struct r5l_io_unit *last;
 	sector_t reclaimable_space;
 	unsigned long flags;
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
 	__r5l_set_io_unit_state(io, IO_UNIT_STRIPE_END);
 	list_move_tail(&io->log_sibling, &log->stripe_end_ios);
-	spin_unlock_irqrestore(&log->io_list_lock, flags);
 	wake_up(&io->wait_state);
 
-	last = list_last_entry(&log->stripe_end_ios,
-			struct r5l_io_unit, log_sibling);
 	reclaimable_space = r5l_ring_distance(log, log->last_checkpoint,
-				last->log_end);
+				io->log_end);
 	if (reclaimable_space >= log->max_free_space)
 		r5l_wake_reclaim(log, 0);
 
-	r5l_compress_stripe_end_list(log);
+	r5l_compress_stripe_end_list(log, io);
+	spin_unlock_irqrestore(&log->io_list_lock, flags);
 }
 
 void r5l_stripe_write_finished(struct stripe_head *sh)
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: fix use after free in raid5-cache
  2015-09-02 14:14 fix use after free in raid5-cache Christoph Hellwig
                   ` (2 preceding siblings ...)
  2015-09-02 14:14 ` [PATCH 3/3] raid5-cache: fix __r5l_stripe_write_finished Christoph Hellwig
@ 2015-09-02 16:55 ` Shaohua Li
  2015-09-02 17:22   ` Christoph Hellwig
  3 siblings, 1 reply; 6+ messages in thread
From: Shaohua Li @ 2015-09-02 16:55 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: neilb, dan.j.williams, linux-raid, Kernel-team, Song Liu

On Wed, Sep 02, 2015 at 04:14:45PM +0200, Christoph Hellwig wrote:
> Hi Shaohua, hi Neil,
> 
> this series fixes a use after free of the r5l_io_unit structure I ran into
> while testing the caching code.  The real fix is in patch 3, but other two
> patches contain refactoring to enable the fix.

Hi Christoph,
Thanks for looking at it. I had some patches hold on my side, which fix
the use after free issue too. I changed the io_unit list handling a
little bit. Specifically making r5l_flush_stripe_to_raid run flush in
asynchronous way and also fix io_unit free issue.

For this patch set, the 1st is a good cleanup. I think the 2 & 3 have
the same issue changing the list order. For example, io_unit A is
dispatched to log earlier than io_unit B, but io_unit B can finish
earlier than io_unit A. If we move io_unit B to io_end_ios first, and
there is a crash, the metadata of io_unit A could be corrupt, recovery
can't find io_unit B. Please see the comments at
r5l_flush_stripe_to_raid().

I can rebase the 1st against my patches, what do you think?

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: fix use after free in raid5-cache
  2015-09-02 16:55 ` fix use after free in raid5-cache Shaohua Li
@ 2015-09-02 17:22   ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2015-09-02 17:22 UTC (permalink / raw)
  To: Shaohua Li; +Cc: neilb, dan.j.williams, linux-raid, Kernel-team, Song Liu

Ok, I spotted the break there now.  Feel free to rebase it and please
push your current tree out, as I have a bunch of other changes pending.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-09-02 17:22 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-02 14:14 fix use after free in raid5-cache Christoph Hellwig
2015-09-02 14:14 ` [PATCH 1/3] raid5-cache: move functionality out of __r5l_set_io_unit_state Christoph Hellwig
2015-09-02 14:14 ` [PATCH 2/3] raid5-cache: remove r5l_move_io_unit_list Christoph Hellwig
2015-09-02 14:14 ` [PATCH 3/3] raid5-cache: fix __r5l_stripe_write_finished Christoph Hellwig
2015-09-02 16:55 ` fix use after free in raid5-cache Shaohua Li
2015-09-02 17:22   ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).