From: "Paul E. McKenney" <paulmck@us.ibm.com>
To: andmike@us.ibm.com, dipankar@us.ibm.com, neilb@cse.unsw.edu.au,
mingo@redhat.com
Cc: linux-kernel@vger.kernel.org
Subject: [RFC, PATCH] RCU questions on md driver
Date: Sat, 4 Dec 2004 16:31:04 -0800 [thread overview]
Message-ID: <20041205003104.GA1914@us.ibm.com> (raw)
Hello!
A few questions and comments on md driver locking, my best guess at
answers may be found in the patch below (which I have not even attempted
to compile, let alone test).
o Need some rcu_dereference() primitives in a number of places.
o The reference counts must be decremented after rcu_read_lock().
Otherwise, unless I am missing something, a preemption (in a
CONFIG_PREEMPT kernel) occuring just before the rcu_read_lock()
looks like it could destroy the element subsequently used.
o How does the locking work in read_balance() in drivers/md/raid1.c?
It looks to me that the conf->mirrors[] array could potentially
be changing during the read_balance() operation, which I cannot
see how is handled correctly.
For that matter, I don't see what prevents multiple instances
of read_balance() from executing concurrently on the same
set of disks...
o Ditto for read_balance in drivers/md/raid10.c. And raid5.c.
Thoughts?
Thanx, Paul
diff -urpN -X ../dontdiff linux-2.5/drivers/md/multipath.c linux-2.5-mpio/drivers/md/multipath.c
--- linux-2.5/drivers/md/multipath.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/multipath.c Sat Dec 4 15:32:19 2004
@@ -63,7 +63,7 @@ static int multipath_map (multipath_conf
rcu_read_lock();
for (i = 0; i < disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && rdev->in_sync) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
@@ -138,7 +138,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -148,8 +148,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -222,7 +222,7 @@ static int multipath_issue_flush(request
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -234,8 +234,8 @@ static int multipath_issue_flush(request
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid1.c linux-2.5-mpio/drivers/md/raid1.c
--- linux-2.5/drivers/md/raid1.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid1.c Sat Dec 4 16:16:48 2004
@@ -338,6 +338,7 @@ static int read_balance(conf_t *conf, r1
int new_disk = conf->last_used, disk = new_disk;
const int sectors = r1_bio->sectors;
sector_t new_distance, current_distance;
+ mdk_rdev_t *rdev;
rcu_read_lock();
/*
@@ -350,8 +351,9 @@ static int read_balance(conf_t *conf, r1
/* Choose the first operation device, for consistancy */
new_disk = 0;
- while (!conf->mirrors[new_disk].rdev ||
- !conf->mirrors[new_disk].rdev->in_sync) {
+ while (!(rdev =
+ rcu_dereference(conf->mirrors[new_disk].rdev)) ||
+ !rdev->in_sync) {
new_disk++;
if (new_disk == conf->raid_disks) {
new_disk = -1;
@@ -363,8 +365,8 @@ static int read_balance(conf_t *conf, r1
/* make sure the disk is operational */
- while (!conf->mirrors[new_disk].rdev ||
- !conf->mirrors[new_disk].rdev->in_sync) {
+ while (!(rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) ||
+ !rdev->in_sync) {
if (new_disk <= 0)
new_disk = conf->raid_disks;
new_disk--;
@@ -393,11 +395,11 @@ static int read_balance(conf_t *conf, r1
disk = conf->raid_disks;
disk--;
- if (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync)
+ if (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+ !rdev->in_sync)
continue;
- if (!atomic_read(&conf->mirrors[disk].rdev->nr_pending)) {
+ if (!atomic_read(&rdev->nr_pending)) {
new_disk = disk;
break;
}
@@ -414,7 +416,7 @@ rb_out:
if (new_disk >= 0) {
conf->next_seq_sect = this_sector + sectors;
conf->last_used = new_disk;
- atomic_inc(&conf->mirrors[new_disk].rdev->nr_pending);
+ atomic_inc(&rdev->nr_pending);
}
rcu_read_unlock();
@@ -428,7 +430,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -438,8 +440,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -459,7 +461,7 @@ static int raid1_issue_flush(request_que
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -471,8 +473,8 @@ static int raid1_issue_flush(request_que
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
@@ -585,9 +587,9 @@ static int make_request(request_queue_t
disks = conf->raid_disks;
rcu_read_lock();
for (i = 0; i < disks; i++) {
- if (conf->mirrors[i].rdev &&
- !conf->mirrors[i].rdev->faulty) {
- atomic_inc(&conf->mirrors[i].rdev->nr_pending);
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && !rdev->faulty) {
+ atomic_inc(&rdev->nr_pending);
r1_bio->bios[i] = bio;
} else
r1_bio->bios[i] = NULL;
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid10.c linux-2.5-mpio/drivers/md/raid10.c
--- linux-2.5/drivers/md/raid10.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid10.c Sat Dec 4 16:23:49 2004
@@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r1
int disk, slot, nslot;
const int sectors = r10_bio->sectors;
sector_t new_distance, current_distance;
+ mdk_rdev_t *rdev;
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
@@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r1
slot = 0;
disk = r10_bio->devs[slot].devnum;
- while (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync) {
+ while (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+ !rdev->in_sync) {
slot++;
if (slot == conf->copies) {
slot = 0;
@@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r1
/* make sure the disk is operational */
slot = 0;
disk = r10_bio->devs[slot].devnum;
- while (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync) {
+ while (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+ !rdev->in_sync) {
slot ++;
if (slot == conf->copies) {
disk = -1;
@@ -546,11 +547,11 @@ static int read_balance(conf_t *conf, r1
int ndisk = r10_bio->devs[nslot].devnum;
- if (!conf->mirrors[ndisk].rdev ||
- !conf->mirrors[ndisk].rdev->in_sync)
+ if (!(rdev = rcu_dereference(conf->mirrors[ndisk].rdev)) ||
+ !rdev->in_sync)
continue;
- if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) {
+ if (!atomic_read(&rdev->nr_pending)) {
disk = ndisk;
slot = nslot;
break;
@@ -568,8 +569,8 @@ rb_out:
r10_bio->read_slot = slot;
/* conf->next_seq_sect = this_sector + sectors;*/
- if (disk >= 0 && conf->mirrors[disk].rdev)
- atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
+ if (disk >= 0 && rdev)
+ atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
return disk;
@@ -582,7 +583,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -592,8 +593,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -613,7 +614,7 @@ static int raid10_issue_flush(request_qu
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -625,8 +626,8 @@ static int raid10_issue_flush(request_qu
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
@@ -763,10 +764,11 @@ static int make_request(request_queue_t
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
+ mdk_rdev_t *rdev;
int d = r10_bio->devs[i].devnum;
- if (conf->mirrors[d].rdev &&
- !conf->mirrors[d].rdev->faulty) {
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
+ if (rdev && !rdev->faulty) {
+ atomic_inc(&rdev->nr_pending);
r10_bio->devs[i].bio = bio;
} else
r10_bio->devs[i].bio = NULL;
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid5.c linux-2.5-mpio/drivers/md/raid5.c
--- linux-2.5/drivers/md/raid5.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid5.c Sat Dec 4 16:26:01 2004
@@ -1248,7 +1248,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid5_end_read_request;
rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1305,7 +1305,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -1315,8 +1315,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -1348,7 +1348,7 @@ static int raid5_issue_flush(request_que
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -1360,8 +1360,8 @@ static int raid5_issue_flush(request_que
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid6main.c linux-2.5-mpio/drivers/md/raid6main.c
--- linux-2.5/drivers/md/raid6main.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid6main.c Sat Dec 4 16:27:56 2004
@@ -1411,7 +1411,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid6_end_read_request;
rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1468,7 +1468,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -1478,8 +1478,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -1511,7 +1511,7 @@ static int raid6_issue_flush(request_que
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -1523,8 +1523,8 @@ static int raid6_issue_flush(request_que
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
next reply other threads:[~2004-12-05 23:10 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-12-05 0:31 Paul E. McKenney [this message]
2004-12-05 23:57 ` [RFC, PATCH] RCU questions on md driver Neil Brown
2004-12-06 18:30 ` Paul E. McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20041205003104.GA1914@us.ibm.com \
--to=paulmck@us.ibm.com \
--cc=andmike@us.ibm.com \
--cc=dipankar@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=neilb@cse.unsw.edu.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox