All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch] dm-raid1.c  fix  a race bug in __rh_alloc()
@ 2005-06-17  2:21 Zhao Qian
  2005-06-24 15:33 ` Jonathan E Brassow
  0 siblings, 1 reply; 4+ messages in thread
From: Zhao Qian @ 2005-06-17  2:21 UTC (permalink / raw)
  To: dm-devel

after write_unlock_irq and just before read_lock, there's a small window which enables a race causing deletion of the region struct in function rh_update_states(). then in rh_dec(), the __rh_lookup() will return null, causing kernel panic. 


[root@darkstar md]# diff -u dm-raid1.orig.c dm-raid1.c 
--- dm-raid1.orig.c     2005-06-16 14:17:04.000000000 +0800
+++ dm-raid1.c  2005-06-17 10:02:04.000000000 +0800
@@ -252,15 +252,16 @@
 
        else {
                __rh_insert(rh, nreg);
-               if (nreg->state == RH_CLEAN) {
-                       spin_lock(&rh->region_lock);
-                       list_add(&nreg->list, &rh->clean_regions);
-                       spin_unlock(&rh->region_lock);
-               }
                reg = nreg;
        }
        write_unlock_irq(&rh->hash_lock);
        read_lock(&rh->hash_lock);
+       if (reg->state == RH_CLEAN) {
+               spin_lock(&rh->region_lock);
+               if ( list_empty(&reg->list) )
+                       list_add(&reg->list, &rh->clean_regions);
+               spin_unlock(&rh->region_lock);
+       }
 
        return reg;
 }

Sincerely,
    Johnson <dujun@aaastor.com>
    AiM9 <zhaoqian@aaastor.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] dm-raid1.c  fix  a race bug in __rh_alloc()
  2005-06-17  2:21 [patch] dm-raid1.c fix a race bug in __rh_alloc() Zhao Qian
@ 2005-06-24 15:33 ` Jonathan E Brassow
  2005-06-28 13:46   ` Jun'ichi Nomura
  0 siblings, 1 reply; 4+ messages in thread
From: Jonathan E Brassow @ 2005-06-24 15:33 UTC (permalink / raw)
  To: device-mapper development


[-- Attachment #1.1: Type: text/plain, Size: 1558 bytes --]

I believe this also fixes Jun'ichi's issue ([dm-devel] [PATCH] 
2.6.12-rc6: fix __rh_alloc()/rh_update_states() race in dm-raid1.c)

  brassow

On Jun 16, 2005, at 9:21 PM, Zhao Qian wrote:

> after write_unlock_irq and just before read_lock, there's a small 
> window which enables a race causing deletion of the region struct in 
> function rh_update_states(). then in rh_dec(), the __rh_lookup() will 
> return null, causing kernel panic.
>
>
> [root@darkstar md]# diff -u dm-raid1.orig.c dm-raid1.c
> --- dm-raid1.orig.c     2005-06-16 14:17:04.000000000 +0800
> +++ dm-raid1.c  2005-06-17 10:02:04.000000000 +0800
> @@ -252,15 +252,16 @@
>
>         else {
>                 __rh_insert(rh, nreg);
> -               if (nreg->state == RH_CLEAN) {
> -                       spin_lock(&rh->region_lock);
> -                       list_add(&nreg->list, &rh->clean_regions);
> -                       spin_unlock(&rh->region_lock);
> -               }
>                 reg = nreg;
>         }
>         write_unlock_irq(&rh->hash_lock);
>         read_lock(&rh->hash_lock);
> +       if (reg->state == RH_CLEAN) {
> +               spin_lock(&rh->region_lock);
> +               if ( list_empty(&reg->list) )
> +                       list_add(&reg->list, &rh->clean_regions);
> +               spin_unlock(&rh->region_lock);
> +       }
>
>         return reg;
>  }
>
> Sincerely,
>     Johnson <dujun@aaastor.com>
>     AiM9 <zhaoqian@aaastor.com>
>
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel
>

[-- Attachment #1.2: Type: text/enriched, Size: 1549 bytes --]

I believe this also fixes Jun'ichi's issue (<bold>[dm-devel] [PATCH]
2.6.12-rc6: fix __rh_alloc()/rh_update_states() race in dm-raid1.c)


 brassow

</bold>

On Jun 16, 2005, at 9:21 PM, Zhao Qian wrote:


<excerpt>after write_unlock_irq and just before read_lock, there's a
small window which enables a race causing deletion of the region
struct in function rh_update_states(). then in rh_dec(), the
__rh_lookup() will return null, causing kernel panic. 



[root@darkstar md]# diff -u dm-raid1.orig.c dm-raid1.c 

--- dm-raid1.orig.c     2005-06-16 14:17:04.000000000 +0800

+++ dm-raid1.c  2005-06-17 10:02:04.000000000 +0800

@@ -252,15 +252,16 @@


        else {

                __rh_insert(rh, nreg);

-               if (nreg->state == RH_CLEAN) {

-                       spin_lock(&rh->region_lock);

-                       list_add(&nreg->list, &rh->clean_regions);

-                       spin_unlock(&rh->region_lock);

-               }

                reg = nreg;

        }

        write_unlock_irq(&rh->hash_lock);

        read_lock(&rh->hash_lock);

+       if (reg->state == RH_CLEAN) {

+               spin_lock(&rh->region_lock);

+               if ( list_empty(&reg->list) )

+                       list_add(&reg->list, &rh->clean_regions);

+               spin_unlock(&rh->region_lock);

+       }


        return reg;

 }


Sincerely,

    Johnson <<dujun@aaastor.com>

    AiM9 <<zhaoqian@aaastor.com>


--

dm-devel mailing list

dm-devel@redhat.com

https://www.redhat.com/mailman/listinfo/dm-devel


</excerpt>

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] dm-raid1.c  fix  a race bug in __rh_alloc()
  2005-06-24 15:33 ` Jonathan E Brassow
@ 2005-06-28 13:46   ` Jun'ichi Nomura
  2005-06-29  1:32     ` Du Jun
  0 siblings, 1 reply; 4+ messages in thread
From: Jun'ichi Nomura @ 2005-06-28 13:46 UTC (permalink / raw)
  To: device-mapper development

[-- Attachment #1: Type: text/plain, Size: 1169 bytes --]

Hi,

Jonathan E Brassow wrote:
 > I believe this also fixes Jun'ichi's issue (*[dm-devel] [PATCH]
 > 2.6.12-rc6: fix __rh_alloc()/rh_update_states() race in dm-raid1.c)

Unfortunately, it doesn't eliminate the possibility of the region
being freed during lock conversion (though the possibility becomes
very very low) and also it will cause other problems.

I modified his patch with fixes below
   - spin_lock should be spin_lock_irq to avoid deadlock,
   - needs to check pending count to avoid moving dirty region to clean list,
and added retry code.

This one should work. How about this?

However, I think we need to find smarter fix not to depend on retry.

Thanks,

Jonathan E Brassow wrote:
> I believe this also fixes Jun'ichi's issue (*[dm-devel] [PATCH] 
> 2.6.12-rc6: fix __rh_alloc()/rh_update_states() race in dm-raid1.c)
> 
> brassow
> *
> On Jun 16, 2005, at 9:21 PM, Zhao Qian wrote:
> 
>     after write_unlock_irq and just before read_lock, there's a small
>     window which enables a race causing deletion of the region struct in
>     function rh_update_states(). then in rh_dec(), the __rh_lookup()
>     will return null, causing kernel panic.


[-- Attachment #2: dm.patch --]
[-- Type: text/x-patch, Size: 1054 bytes --]

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -232,6 +232,7 @@ static struct region *__rh_alloc(struct 
 {
 	struct region *reg, *nreg;
 
+retry:
 	read_unlock(&rh->hash_lock);
 	nreg = mempool_alloc(rh->region_pool, GFP_NOIO);
 	nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
@@ -252,15 +253,19 @@ static struct region *__rh_alloc(struct 
 
 	else {
 		__rh_insert(rh, nreg);
-		if (nreg->state == RH_CLEAN) {
-			spin_lock(&rh->region_lock);
-			list_add(&nreg->list, &rh->clean_regions);
-			spin_unlock(&rh->region_lock);
-		}
 		reg = nreg;
 	}
 	write_unlock_irq(&rh->hash_lock);
 	read_lock(&rh->hash_lock);
+	/* rare, but it's possible the region is freed by other cpu */
+	if (reg != __rh_lookup(rh, region))
+		goto retry;
+	if (reg->state == RH_CLEAN) {
+		spin_lock_irq(&rh->region_lock);
+		if (!atomic_read(&reg->pending) && list_empty(&reg->list))
+			list_add(&reg->list, &rh->clean_regions);
+		spin_unlock_irq(&rh->region_lock);
+	}
 
 	return reg;
 }

[-- Attachment #3: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] dm-raid1.c  fix  a race bug in __rh_alloc()
  2005-06-28 13:46   ` Jun'ichi Nomura
@ 2005-06-29  1:32     ` Du Jun
  0 siblings, 0 replies; 4+ messages in thread
From: Du Jun @ 2005-06-29  1:32 UTC (permalink / raw)
  To: device-mapper development

the problem lies on that there's a small window between the write_unlock and
read_lock. for a smarter fix mentioned by Jun'ichi, I think we need a
downgradable rw lock to eliminate the race window.


----- Original Message ----- 
From: "Jun'ichi Nomura" <j-nomura@ce.jp.nec.com>
To: "device-mapper development" <dm-devel@redhat.com>
Sent: Tuesday, June 28, 2005 9:46 PM
Subject: Re: [dm-devel] [patch] dm-raid1.c fix a race bug in __rh_alloc()


> Hi,
>
> Jonathan E Brassow wrote:
>  > I believe this also fixes Jun'ichi's issue (*[dm-devel] [PATCH]
>  > 2.6.12-rc6: fix __rh_alloc()/rh_update_states() race in dm-raid1.c)
>
> Unfortunately, it doesn't eliminate the possibility of the region
> being freed during lock conversion (though the possibility becomes
> very very low) and also it will cause other problems.
>
> I modified his patch with fixes below
>    - spin_lock should be spin_lock_irq to avoid deadlock,
>    - needs to check pending count to avoid moving dirty region to clean
list,
> and added retry code.
>
> This one should work. How about this?
>
> However, I think we need to find smarter fix not to depend on retry.
>
> Thanks,
>
> Jonathan E Brassow wrote:
> > I believe this also fixes Jun'ichi's issue (*[dm-devel] [PATCH]
> > 2.6.12-rc6: fix __rh_alloc()/rh_update_states() race in dm-raid1.c)
> >
> > brassow
> > *
> > On Jun 16, 2005, at 9:21 PM, Zhao Qian wrote:
> >
> >     after write_unlock_irq and just before read_lock, there's a small
> >     window which enables a race causing deletion of the region struct in
> >     function rh_update_states(). then in rh_dec(), the __rh_lookup()
> >     will return null, causing kernel panic.
>
>


----------------------------------------------------------------------------
----


> diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
> --- a/drivers/md/dm-raid1.c
> +++ b/drivers/md/dm-raid1.c
> @@ -232,6 +232,7 @@ static struct region *__rh_alloc(struct
>  {
>   struct region *reg, *nreg;
>
> +retry:
>   read_unlock(&rh->hash_lock);
>   nreg = mempool_alloc(rh->region_pool, GFP_NOIO);
>   nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
> @@ -252,15 +253,19 @@ static struct region *__rh_alloc(struct
>
>   else {
>   __rh_insert(rh, nreg);
> - if (nreg->state == RH_CLEAN) {
> - spin_lock(&rh->region_lock);
> - list_add(&nreg->list, &rh->clean_regions);
> - spin_unlock(&rh->region_lock);
> - }
>   reg = nreg;
>   }
>   write_unlock_irq(&rh->hash_lock);
>   read_lock(&rh->hash_lock);
> + /* rare, but it's possible the region is freed by other cpu */
> + if (reg != __rh_lookup(rh, region))
> + goto retry;
> + if (reg->state == RH_CLEAN) {
> + spin_lock_irq(&rh->region_lock);
> + if (!atomic_read(&reg->pending) && list_empty(&reg->list))
> + list_add(&reg->list, &rh->clean_regions);
> + spin_unlock_irq(&rh->region_lock);
> + }
>
>   return reg;
>  }
>


----------------------------------------------------------------------------
----


> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-06-29  1:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-17  2:21 [patch] dm-raid1.c fix a race bug in __rh_alloc() Zhao Qian
2005-06-24 15:33 ` Jonathan E Brassow
2005-06-28 13:46   ` Jun'ichi Nomura
2005-06-29  1:32     ` Du Jun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.