linux-bcache.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] bcache: fixup lock c->root error
@ 2023-09-06  6:51 Mingzhe Zou
  2023-09-06 20:23 ` Eric Wheeler
  0 siblings, 1 reply; 3+ messages in thread
From: Mingzhe Zou @ 2023-09-06  6:51 UTC (permalink / raw)
  To: colyli, bcache, linux-bcache; +Cc: zoumingzhe, Mingzhe Zou

We had a problem with io hung because it was waiting for c->root to
release the lock.

crash> cache_set.root -l cache_set.list ffffa03fde4c0050
  root = 0xffff802ef454c800
crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
  [ffff802ef454c858] struct rw_semaphore lock;
crash> struct rw_semaphore ffff802ef454c858
struct rw_semaphore {
  count = {
    counter = -4294967297
  },
  wait_list = {
    next = 0xffff00006786fc28,
    prev = 0xffff00005d0efac8
  },
  wait_lock = {
    raw_lock = {
      {
        val = {
          counter = 0
        },
        {
          locked = 0 '\000',
          pending = 0 '\000'
        },
        {
          locked_pending = 0,
          tail = 0
        }
      }
    }
  },
  osq = {
    tail = {
      counter = 0
    }
  },
  owner = 0xffffa03fdc586603
}

The "counter = -4294967297" means that lock count is -1 and a write lock
is being attempted. Then, we found that there is a btree with a counter
of 1 in btree_cache_freeable.

crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
  [ffffa03fde4c1140] struct list_head btree_cache;
  [ffffa03fde4c1150] struct list_head btree_cache_freeable;
  [ffffa03fde4c1160] struct list_head btree_cache_freed;
  [ffffa03fde4c1170] unsigned int btree_cache_used;
  [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
  [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
crash> list -H ffffa03fde4c1140|wc -l
973
crash> list -H ffffa03fde4c1150|wc -l
1123
crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
  btree_cache_used = 2097
crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^  lock = {" > btree_cache.txt
crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^  lock = {" > btree_cache_freeable.txt
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
/var/crash/127.0.0.1-2023-08-04-16:40:28
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
[root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
      counter = 1

We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
    (1). Thread X has locked c->root(A) write.
    (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
    (3). Thread X bch_btree_set_root() changes c->root from A to B.
    (4). Thread X releases the lock(c->root A).
    (5). Thread Y successfully locks c->root(A).
    (6). Thread Y releases the lock(c->root B).

        down_write locked ---(1)----------------------┐
                |                                     |
                |   down_read waiting ---(2)----┐     |
                |           |               ┌-------------┐ ┌-------------┐
        bch_btree_set_root ===(3)========>> | c->root   A | | c->root   B |
                |           |               └-------------┘ └-------------┘
            up_write ---(4)---------------------┘     |            |
                            |                         |            |
                    down_read locked ---(5)-----------┘            |
                            |                                      |
                        up_read ---(6)-----------------------------┘

Since c->root may change, the correct steps to lock c->root should be
the same as bch_root_usage(), compare after locking.

static unsigned int bch_root_usage(struct cache_set *c)
{
        unsigned int bytes = 0;
        struct bkey *k;
        struct btree *b;
        struct btree_iter iter;

        goto lock_root;

        do {
                rw_unlock(false, b);
lock_root:
                b = c->root;
                rw_lock(false, b, b->level);
        } while (b != c->root);

        for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
                bytes += bkey_bytes(k);

        rw_unlock(false, b);

        return (bytes * 100) / btree_bytes(c);
}

Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded")
Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
---
 drivers/md/bcache/writeback.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 24c049067f61..bac916ba08c8 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void)
 void bch_sectors_dirty_init(struct bcache_device *d)
 {
 	int i;
+	struct btree *b = NULL;
 	struct bkey *k = NULL;
 	struct btree_iter iter;
 	struct sectors_dirty_init op;
 	struct cache_set *c = d->c;
 	struct bch_dirty_init_state state;
 
+retry_lock:
+	b = c->root;
+	rw_lock(0, b, b->level);
+	if (b != c->root) {
+		rw_unlock(0, b);
+		goto retry_lock;
+	}
+
 	/* Just count root keys if no leaf node */
-	rw_lock(0, c->root, c->root->level);
 	if (c->root->level == 0) {
 		bch_btree_op_init(&op.op, -1);
 		op.inode = d->id;
@@ -994,7 +1002,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
 				    k, &iter, bch_ptr_invalid)
 			sectors_dirty_init_fn(&op.op, c->root, k);
 
-		rw_unlock(0, c->root);
+		rw_unlock(0, b);
 		return;
 	}
 
@@ -1030,7 +1038,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
 out:
 	/* Must wait for all threads to stop. */
 	wait_event(state.wait, atomic_read(&state.started) == 0);
-	rw_unlock(0, c->root);
+	rw_unlock(0, b);
 }
 
 void bch_cached_dev_writeback_init(struct cached_dev *dc)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] bcache: fixup lock c->root error
  2023-09-06  6:51 [PATCH] bcache: fixup lock c->root error Mingzhe Zou
@ 2023-09-06 20:23 ` Eric Wheeler
  2023-09-08  3:44   ` 邹明哲
  0 siblings, 1 reply; 3+ messages in thread
From: Eric Wheeler @ 2023-09-06 20:23 UTC (permalink / raw)
  To: Mingzhe Zou; +Cc: colyli, bcache, linux-bcache, zoumingzhe

[-- Attachment #1: Type: text/plain, Size: 5993 bytes --]

On Wed, 6 Sep 2023, Mingzhe Zou wrote:

> We had a problem with io hung because it was waiting for c->root to
> release the lock.
> 
> crash> cache_set.root -l cache_set.list ffffa03fde4c0050
>   root = 0xffff802ef454c800
> crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
>   [ffff802ef454c858] struct rw_semaphore lock;
> crash> struct rw_semaphore ffff802ef454c858
> struct rw_semaphore {
>   count = {
>     counter = -4294967297
>   },
>   wait_list = {
>     next = 0xffff00006786fc28,
>     prev = 0xffff00005d0efac8
>   },
>   wait_lock = {
>     raw_lock = {
>       {
>         val = {
>           counter = 0
>         },
>         {
>           locked = 0 '\000',
>           pending = 0 '\000'
>         },
>         {
>           locked_pending = 0,
>           tail = 0
>         }
>       }
>     }
>   },
>   osq = {
>     tail = {
>       counter = 0
>     }
>   },
>   owner = 0xffffa03fdc586603
> }
> 
> The "counter = -4294967297" means that lock count is -1 and a write lock
> is being attempted. Then, we found that there is a btree with a counter
> of 1 in btree_cache_freeable.
> 
> crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
>   [ffffa03fde4c1140] struct list_head btree_cache;
>   [ffffa03fde4c1150] struct list_head btree_cache_freeable;
>   [ffffa03fde4c1160] struct list_head btree_cache_freed;
>   [ffffa03fde4c1170] unsigned int btree_cache_used;
>   [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
>   [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
> crash> list -H ffffa03fde4c1140|wc -l
> 973
> crash> list -H ffffa03fde4c1150|wc -l
> 1123
> crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
>   btree_cache_used = 2097
> crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^  lock = {" > btree_cache.txt
> crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^  lock = {" > btree_cache_freeable.txt
> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
> /var/crash/127.0.0.1-2023-08-04-16:40:28
> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
>       counter = 1
> 
> We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
>     (1). Thread X has locked c->root(A) write.
>     (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
>     (3). Thread X bch_btree_set_root() changes c->root from A to B.
>     (4). Thread X releases the lock(c->root A).
>     (5). Thread Y successfully locks c->root(A).
>     (6). Thread Y releases the lock(c->root B).
> 
>         down_write locked ---(1)----------------------┐
>                 |                                     |
>                 |   down_read waiting ---(2)----┐     |
>                 |           |               ┌-------------┐ ┌-------------┐
>         bch_btree_set_root ===(3)========>> | c->root   A | | c->root   B |
>                 |           |               └-------------┘ └-------------┘
>             up_write ---(4)---------------------┘     |            |
>                             |                         |            |
>                     down_read locked ---(5)-----------┘            |
>                             |                                      |
>                         up_read ---(6)-----------------------------┘
> 
> Since c->root may change, the correct steps to lock c->root should be
> the same as bch_root_usage(), compare after locking.
> 
> static unsigned int bch_root_usage(struct cache_set *c)
> {
>         unsigned int bytes = 0;
>         struct bkey *k;
>         struct btree *b;
>         struct btree_iter iter;
> 
>         goto lock_root;
> 
>         do {
>                 rw_unlock(false, b);
> lock_root:
>                 b = c->root;
>                 rw_lock(false, b, b->level);
>         } while (b != c->root);
> 
>         for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
>                 bytes += bkey_bytes(k);
> 
>         rw_unlock(false, b);
> 
>         return (bytes * 100) / btree_bytes(c);
> }
> 
> Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded")



Good work.  b144e45fc576 is from v5.6. 

Please CC stable@vger.kernel.org

--
Eric Wheeler





> Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
> ---
>  drivers/md/bcache/writeback.c | 14 +++++++++++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
> index 24c049067f61..bac916ba08c8 100644
> --- a/drivers/md/bcache/writeback.c
> +++ b/drivers/md/bcache/writeback.c
> @@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void)
>  void bch_sectors_dirty_init(struct bcache_device *d)
>  {
>  	int i;
> +	struct btree *b = NULL;
>  	struct bkey *k = NULL;
>  	struct btree_iter iter;
>  	struct sectors_dirty_init op;
>  	struct cache_set *c = d->c;
>  	struct bch_dirty_init_state state;
>  
> +retry_lock:
> +	b = c->root;
> +	rw_lock(0, b, b->level);
> +	if (b != c->root) {
> +		rw_unlock(0, b);
> +		goto retry_lock;
> +	}
> +
>  	/* Just count root keys if no leaf node */
> -	rw_lock(0, c->root, c->root->level);
>  	if (c->root->level == 0) {
>  		bch_btree_op_init(&op.op, -1);
>  		op.inode = d->id;
> @@ -994,7 +1002,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
>  				    k, &iter, bch_ptr_invalid)
>  			sectors_dirty_init_fn(&op.op, c->root, k);
>  
> -		rw_unlock(0, c->root);
> +		rw_unlock(0, b);
>  		return;
>  	}
>  
> @@ -1030,7 +1038,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
>  out:
>  	/* Must wait for all threads to stop. */
>  	wait_event(state.wait, atomic_read(&state.started) == 0);
> -	rw_unlock(0, c->root);
> +	rw_unlock(0, b);
>  }
>  
>  void bch_cached_dev_writeback_init(struct cached_dev *dc)
> -- 
> 2.25.1
> 
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re:Re: [PATCH] bcache: fixup lock c->root error
  2023-09-06 20:23 ` Eric Wheeler
@ 2023-09-08  3:44   ` 邹明哲
  0 siblings, 0 replies; 3+ messages in thread
From: 邹明哲 @ 2023-09-08  3:44 UTC (permalink / raw)
  To: Eric Wheeler; +Cc: colyli, bcache, linux-bcache, zoumingzhe


From: Eric Wheeler <bcache@lists.ewheeler.net>
Date: 2023-09-07 04:23:50
To:  Mingzhe Zou <mingzhe.zou@easystack.cn>
Cc:  colyli@suse.de,bcache@lists.ewheeler.net,linux-bcache@vger.kernel.org,zoumingzhe@qq.com
Subject: Re: [PATCH] bcache: fixup lock c->root error>On Wed, 6 Sep 2023, Mingzhe Zou wrote:
>
>> We had a problem with io hung because it was waiting for c->root to
>> release the lock.
>> 
>> crash> cache_set.root -l cache_set.list ffffa03fde4c0050
>>   root = 0xffff802ef454c800
>> crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
>>   [ffff802ef454c858] struct rw_semaphore lock;
>> crash> struct rw_semaphore ffff802ef454c858
>> struct rw_semaphore {
>>   count = {
>>     counter = -4294967297
>>   },
>>   wait_list = {
>>     next = 0xffff00006786fc28,
>>     prev = 0xffff00005d0efac8
>>   },
>>   wait_lock = {
>>     raw_lock = {
>>       {
>>         val = {
>>           counter = 0
>>         },
>>         {
>>           locked = 0 '\000',
>>           pending = 0 '\000'
>>         },
>>         {
>>           locked_pending = 0,
>>           tail = 0
>>         }
>>       }
>>     }
>>   },
>>   osq = {
>>     tail = {
>>       counter = 0
>>     }
>>   },
>>   owner = 0xffffa03fdc586603
>> }
>> 
>> The "counter = -4294967297" means that lock count is -1 and a write lock
>> is being attempted. Then, we found that there is a btree with a counter
>> of 1 in btree_cache_freeable.
>> 
>> crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
>>   [ffffa03fde4c1140] struct list_head btree_cache;
>>   [ffffa03fde4c1150] struct list_head btree_cache_freeable;
>>   [ffffa03fde4c1160] struct list_head btree_cache_freed;
>>   [ffffa03fde4c1170] unsigned int btree_cache_used;
>>   [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
>>   [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
>> crash> list -H ffffa03fde4c1140|wc -l
>> 973
>> crash> list -H ffffa03fde4c1150|wc -l
>> 1123
>> crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
>>   btree_cache_used = 2097
>> crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^  lock = {" > btree_cache.txt
>> crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^  lock = {" > btree_cache_freeable.txt
>> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
>> /var/crash/127.0.0.1-2023-08-04-16:40:28
>> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
>> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
>>       counter = 1
>> 
>> We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
>>     (1). Thread X has locked c->root(A) write.
>>     (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
>>     (3). Thread X bch_btree_set_root() changes c->root from A to B.
>>     (4). Thread X releases the lock(c->root A).
>>     (5). Thread Y successfully locks c->root(A).
>>     (6). Thread Y releases the lock(c->root B).
>> 
>>         down_write locked ---(1)----------------------┐
>>                 |                                     |
>>                 |   down_read waiting ---(2)----┐     |
>>                 |           |               ┌-------------┐ ┌-------------┐
>>         bch_btree_set_root ===(3)========>> | c->root   A | | c->root   B |
>>                 |           |               └-------------┘ └-------------┘
>>             up_write ---(4)---------------------┘     |            |
>>                             |                         |            |
>>                     down_read locked ---(5)-----------┘            |
>>                             |                                      |
>>                         up_read ---(6)-----------------------------┘
>> 
>> Since c->root may change, the correct steps to lock c->root should be
>> the same as bch_root_usage(), compare after locking.
>> 
>> static unsigned int bch_root_usage(struct cache_set *c)
>> {
>>         unsigned int bytes = 0;
>>         struct bkey *k;
>>         struct btree *b;
>>         struct btree_iter iter;
>> 
>>         goto lock_root;
>> 
>>         do {
>>                 rw_unlock(false, b);
>> lock_root:
>>                 b = c->root;
>>                 rw_lock(false, b, b->level);
>>         } while (b != c->root);
>> 
>>         for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
>>                 bytes += bkey_bytes(k);
>> 
>>         rw_unlock(false, b);
>> 
>>         return (bytes * 100) / btree_bytes(c);
>> }
>> 
>> Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded")
>
>
>
>Good work.  b144e45fc576 is from v5.6. 
>
>Please CC stable@vger.kernel.org
>
>--
>Eric Wheeler
>
>
Hi, Eric

I sent V2. Thanks

mingzhe
>
>
>
>> Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
>> ---
>>  drivers/md/bcache/writeback.c | 14 +++++++++++---
>>  1 file changed, 11 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
>> index 24c049067f61..bac916ba08c8 100644
>> --- a/drivers/md/bcache/writeback.c
>> +++ b/drivers/md/bcache/writeback.c
>> @@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void)
>>  void bch_sectors_dirty_init(struct bcache_device *d)
>>  {
>>  	int i;
>> +	struct btree *b = NULL;
>>  	struct bkey *k = NULL;
>>  	struct btree_iter iter;
>>  	struct sectors_dirty_init op;
>>  	struct cache_set *c = d->c;
>>  	struct bch_dirty_init_state state;
>>  
>> +retry_lock:
>> +	b = c->root;
>> +	rw_lock(0, b, b->level);
>> +	if (b != c->root) {
>> +		rw_unlock(0, b);
>> +		goto retry_lock;
>> +	}
>> +
>>  	/* Just count root keys if no leaf node */
>> -	rw_lock(0, c->root, c->root->level);
>>  	if (c->root->level == 0) {
>>  		bch_btree_op_init(&op.op, -1);
>>  		op.inode = d->id;
>> @@ -994,7 +1002,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
>>  				    k, &iter, bch_ptr_invalid)
>>  			sectors_dirty_init_fn(&op.op, c->root, k);
>>  
>> -		rw_unlock(0, c->root);
>> +		rw_unlock(0, b);
>>  		return;
>>  	}
>>  
>> @@ -1030,7 +1038,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
>>  out:
>>  	/* Must wait for all threads to stop. */
>>  	wait_event(state.wait, atomic_read(&state.started) == 0);
>> -	rw_unlock(0, c->root);
>> +	rw_unlock(0, b);
>>  }
>>  
>>  void bch_cached_dev_writeback_init(struct cached_dev *dc)
>> -- 
>> 2.25.1
>> 
>> 



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-09-08  3:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-06  6:51 [PATCH] bcache: fixup lock c->root error Mingzhe Zou
2023-09-06 20:23 ` Eric Wheeler
2023-09-08  3:44   ` 邹明哲

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).