[PATCH 2/2] raid5: Using conf->device_lock protect multi-thread resouce when changed.

All of lore.kernel.org
 help / color / mirror / Atom feed

From: majianpeng <majianpeng@gmail.com>
To: NeilBrown <neilb@suse.de>, shli <shli@kernel.org>
Cc: linux-raid <linux-raid@vger.kernel.org>
Subject: [PATCH 2/2] raid5: Using conf->device_lock protect multi-thread resouce when changed.
Date: Tue, 12 Nov 2013 10:43:39 +0800	[thread overview]
Message-ID: <201311121043363569796@gmail.com> (raw)

When changed group_thread_cnt from sysfs entry, it met OOPS.
The kernel messages are:
[  135.299021] BUG: unable to handle kernel NULL pointer dereference at           (null)
[  135.299073] IP: [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
[  135.299107] PGD 0
[  135.299122] Oops: 0000 [#1] SMP
[  135.299144] Modules linked in: netconsole e1000e ptp pps_core
[  135.299188] CPU: 3 PID: 2225 Comm: md0_raid5 Not tainted 3.12.0+ #24
[  135.299214] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080015  11/09/2011
[  135.299255] task: ffff8800b9638f80 ti: ffff8800b77a4000 task.ti: ffff8800b77a4000
[  135.299283] RIP: 0010:[<ffffffff815188ab>]  [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
[  135.299323] RSP: 0018:ffff8800b77a5c48  EFLAGS: 00010002
[  135.299344] RAX: ffff880037bb5c70 RBX: 0000000000000000 RCX: 0000000000000008
[  135.299371] RDX: ffff880037bb5cb8 RSI: 0000000000000001 RDI: ffff880037bb5c00
[  135.299398] RBP: ffff8800b77a5d08 R08: 0000000000000001 R09: 0000000000000000
[  135.299425] R10: ffff8800b77a5c98 R11: 00000000ffffffff R12: ffff880037bb5c00
[  135.299452] R13: 0000000000000000 R14: 0000000000000000 R15: ffff880037bb5c70
[  135.299479] FS:  0000000000000000(0000) GS:ffff88013fd80000(0000) knlGS:0000000000000000
[  135.299510] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  135.299532] CR2: 0000000000000000 CR3: 0000000001c0b000 CR4: 00000000000407e0
[  135.299559] Stack:
[  135.299570]  ffff8800b77a5c88 ffffffff8107383e ffff8800b77a5c88 ffff880037a64300
[  135.299611]  000000000000ec08 ffff880037bb5cb8 ffff8800b77a5c98 ffffffffffffffd8
[  135.299654]  000000000000ec08 ffff880037bb5c60 ffff8800b77a5c98 ffff8800b77a5c98
[  135.299696] Call Trace:
[  135.299711]  [<ffffffff8107383e>] ? __wake_up+0x4e/0x70
[  135.299733]  [<ffffffff81518f88>] raid5d+0x4c8/0x680
[  135.299756]  [<ffffffff817174ed>] ? schedule_timeout+0x15d/0x1f0
[  135.299781]  [<ffffffff81524c9f>] md_thread+0x11f/0x170
[  135.299804]  [<ffffffff81069cd0>] ? wake_up_bit+0x40/0x40
[  135.299826]  [<ffffffff81524b80>] ? md_rdev_init+0x110/0x110
[  135.299850]  [<ffffffff81069656>] kthread+0xc6/0xd0
[  135.299871]  [<ffffffff81069590>] ? kthread_freezable_should_stop+0x70/0x70
[  135.299899]  [<ffffffff81722ffc>] ret_from_fork+0x7c/0xb0
[  135.299923]  [<ffffffff81069590>] ? kthread_freezable_should_stop+0x70/0x70
[  135.299951] Code: ff ff ff 0f 84 d7 fe ff ff e9 5c fe ff ff 66 90 41 8b b4 24 d8 01 00 00 45 31 ed 85 f6 0f 8e 7b fd ff ff 49 8b 9c 24 d0 01 00 00 <48> 3b 1b 49 89 dd 0f 85 67 fd ff ff 48 8d 43 28 31 d2 eb 17 90
[  135.300005] RIP  [<ffffffff815188ab>] handle_active_stripes+0x32b/0x440
[  135.300005]  RSP <ffff8800b77a5c48>
[  135.300005] CR2: 0000000000000000
[  135.300005] ---[ end trace 504854e5bb7562ed ]---
[  135.300005] Kernel panic - not syncing: Fatal exception

This because raid5d() can running when changed multi-thread resources.
After mddve_suspend(), the raid5d() can still on running.
But when change multi-thread resources in raid5_store_group_thread_cnt(),
it can't use conf->device_lock to protect.

Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
---
 drivers/md/raid5.c |   60 +++++++++++++++++++++++++++++++--------------------
 1 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index cc866f8..c50c180 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5213,15 +5213,17 @@ raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
 		return 0;
 }
 
-static int alloc_thread_groups(struct r5conf *conf, int cnt);
+static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
+				int *worker_cnt_per_group,
+				struct r5worker_group **worker_groups);
 static ssize_t
 raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
 {
 	struct r5conf *conf = mddev->private;
 	unsigned long new;
 	int err;
-	struct r5worker_group *old_groups;
-	int old_group_cnt;
+	struct r5worker_group *new_groups, *old_groups;
+	int group_cnt, worker_cnt_per_group;
 
 	if (len >= PAGE_SIZE)
 		return -EINVAL;
@@ -5237,17 +5239,18 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
 	mddev_suspend(mddev);
 
 	old_groups = conf->worker_groups;
-	old_group_cnt = conf->worker_cnt_per_group;
-
 	if (old_groups)
 		flush_workqueue(raid5_wq);
 
-	conf->worker_groups = NULL;
-	err = alloc_thread_groups(conf, new);
-	if (err) {
-		conf->worker_groups = old_groups;
-		conf->worker_cnt_per_group = old_group_cnt;
-	} else {
+	err = alloc_thread_groups(conf, new, &group_cnt, &worker_cnt_per_group,
+					&new_groups);
+	if (!err) {
+		spin_lock_irq(&conf->device_lock);
+		conf->group_cnt = group_cnt;
+		conf->worker_cnt_per_group = worker_cnt_per_group;
+		conf->worker_groups = new_groups;
+		spin_unlock_irq(&conf->device_lock);
+
 		if (old_groups)
 			kfree(old_groups[0].workers);
 		kfree(old_groups);
@@ -5277,33 +5280,35 @@ static struct attribute_group raid5_attrs_group = {
 	.attrs = raid5_attrs,
 };
 
-static int alloc_thread_groups(struct r5conf *conf, int cnt)
+static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
+				int *worker_cnt_per_group,
+				struct r5worker_group **worker_groups)
 {
 	int i, j;
 	ssize_t size;
 	struct r5worker *workers;
 
-	conf->worker_cnt_per_group = cnt;
+	*worker_cnt_per_group = cnt;
 	if (cnt == 0) {
-		conf->worker_groups = NULL;
+		*group_cnt = 0;
+		*worker_groups = NULL;
 		return 0;
 	}
-	conf->group_cnt = num_possible_nodes();
+	*group_cnt = num_possible_nodes();
 	size = sizeof(struct r5worker) * cnt;
-	workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
-	conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
-				conf->group_cnt, GFP_NOIO);
-	if (!conf->worker_groups || !workers) {
+	workers = kzalloc(size * *group_cnt, GFP_NOIO);
+	*worker_groups = kzalloc(sizeof(struct r5worker_group) *
+				*group_cnt, GFP_NOIO);
+	if (!*worker_groups || !workers) {
 		kfree(workers);
-		kfree(conf->worker_groups);
-		conf->worker_groups = NULL;
+		kfree(*worker_groups);
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < conf->group_cnt; i++) {
+	for (i = 0; i < *group_cnt; i++) {
 		struct r5worker_group *group;
 
-		group = &conf->worker_groups[i];
+		group = worker_groups[i];
 		INIT_LIST_HEAD(&group->handle_list);
 		group->conf = conf;
 		group->workers = workers + i * cnt;
@@ -5461,6 +5466,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 	struct md_rdev *rdev;
 	struct disk_info *disk;
 	char pers_name[6];
+	int group_cnt, worker_cnt_per_group;
+	struct r5worker_group *new_group;
 
 	if (mddev->new_level != 5
 	    && mddev->new_level != 4
@@ -5495,7 +5502,12 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 	if (conf == NULL)
 		goto abort;
 	/* Don't enable multi-threading by default*/
-	if (alloc_thread_groups(conf, 0))
+	if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
+				&new_group)) {
+		conf->group_cnt = group_cnt;
+		conf->worker_cnt_per_group = worker_cnt_per_group;
+		conf->worker_groups = new_group;
+	} else
 		goto abort;
 	spin_lock_init(&conf->device_lock);
 	seqcount_init(&conf->gen_lock);
-- 
1.7.1

next             reply	other threads:[~2013-11-12  2:43 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-12  2:43 majianpeng [this message]
2013-11-12  3:41 ` [PATCH 2/2] raid5: Using conf->device_lock protect multi-thread resouce when changed Shaohua Li

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:cc866f8 dfblob:c50c180 )
 OR (
bs:"[PATCH 2/2] raid5: Using conf->device_lock protect multi-thread resouce when changed." )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201311121043363569796@gmail.com \
    --to=majianpeng@gmail.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=shli@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.