From: "Paul E. McKenney" <paulmck@us.ibm.com>
To: andmike@us.ibm.com, dipankar@us.ibm.com, neilb@cse.unsw.edu.au,
mingo@redhat.com
Cc: linux-kernel@vger.kernel.org
Subject: [RFC, PATCH] RCU questions on md driver
Date: Sat, 4 Dec 2004 16:31:04 -0800 [thread overview]
Message-ID: <20041205003104.GA1914@us.ibm.com> (raw)
Hello!
A few questions and comments on md driver locking, my best guess at
answers may be found in the patch below (which I have not even attempted
to compile, let alone test).
o Need some rcu_dereference() primitives in a number of places.
o The reference counts must be decremented after rcu_read_lock().
Otherwise, unless I am missing something, a preemption (in a
CONFIG_PREEMPT kernel) occuring just before the rcu_read_lock()
looks like it could destroy the element subsequently used.
o How does the locking work in read_balance() in drivers/md/raid1.c?
It looks to me that the conf->mirrors[] array could potentially
be changing during the read_balance() operation, which I cannot
see how is handled correctly.
For that matter, I don't see what prevents multiple instances
of read_balance() from executing concurrently on the same
set of disks...
o Ditto for read_balance in drivers/md/raid10.c. And raid5.c.
Thoughts?
Thanx, Paul
diff -urpN -X ../dontdiff linux-2.5/drivers/md/multipath.c linux-2.5-mpio/drivers/md/multipath.c
--- linux-2.5/drivers/md/multipath.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/multipath.c Sat Dec 4 15:32:19 2004
@@ -63,7 +63,7 @@ static int multipath_map (multipath_conf
rcu_read_lock();
for (i = 0; i < disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && rdev->in_sync) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
@@ -138,7 +138,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -148,8 +148,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -222,7 +222,7 @@ static int multipath_issue_flush(request
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -234,8 +234,8 @@ static int multipath_issue_flush(request
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid1.c linux-2.5-mpio/drivers/md/raid1.c
--- linux-2.5/drivers/md/raid1.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid1.c Sat Dec 4 16:16:48 2004
@@ -338,6 +338,7 @@ static int read_balance(conf_t *conf, r1
int new_disk = conf->last_used, disk = new_disk;
const int sectors = r1_bio->sectors;
sector_t new_distance, current_distance;
+ mdk_rdev_t *rdev;
rcu_read_lock();
/*
@@ -350,8 +351,9 @@ static int read_balance(conf_t *conf, r1
/* Choose the first operation device, for consistancy */
new_disk = 0;
- while (!conf->mirrors[new_disk].rdev ||
- !conf->mirrors[new_disk].rdev->in_sync) {
+ while (!(rdev =
+ rcu_dereference(conf->mirrors[new_disk].rdev)) ||
+ !rdev->in_sync) {
new_disk++;
if (new_disk == conf->raid_disks) {
new_disk = -1;
@@ -363,8 +365,8 @@ static int read_balance(conf_t *conf, r1
/* make sure the disk is operational */
- while (!conf->mirrors[new_disk].rdev ||
- !conf->mirrors[new_disk].rdev->in_sync) {
+ while (!(rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) ||
+ !rdev->in_sync) {
if (new_disk <= 0)
new_disk = conf->raid_disks;
new_disk--;
@@ -393,11 +395,11 @@ static int read_balance(conf_t *conf, r1
disk = conf->raid_disks;
disk--;
- if (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync)
+ if (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+ !rdev->in_sync)
continue;
- if (!atomic_read(&conf->mirrors[disk].rdev->nr_pending)) {
+ if (!atomic_read(&rdev->nr_pending)) {
new_disk = disk;
break;
}
@@ -414,7 +416,7 @@ rb_out:
if (new_disk >= 0) {
conf->next_seq_sect = this_sector + sectors;
conf->last_used = new_disk;
- atomic_inc(&conf->mirrors[new_disk].rdev->nr_pending);
+ atomic_inc(&rdev->nr_pending);
}
rcu_read_unlock();
@@ -428,7 +430,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -438,8 +440,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -459,7 +461,7 @@ static int raid1_issue_flush(request_que
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -471,8 +473,8 @@ static int raid1_issue_flush(request_que
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
@@ -585,9 +587,9 @@ static int make_request(request_queue_t
disks = conf->raid_disks;
rcu_read_lock();
for (i = 0; i < disks; i++) {
- if (conf->mirrors[i].rdev &&
- !conf->mirrors[i].rdev->faulty) {
- atomic_inc(&conf->mirrors[i].rdev->nr_pending);
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && !rdev->faulty) {
+ atomic_inc(&rdev->nr_pending);
r1_bio->bios[i] = bio;
} else
r1_bio->bios[i] = NULL;
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid10.c linux-2.5-mpio/drivers/md/raid10.c
--- linux-2.5/drivers/md/raid10.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid10.c Sat Dec 4 16:23:49 2004
@@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r1
int disk, slot, nslot;
const int sectors = r10_bio->sectors;
sector_t new_distance, current_distance;
+ mdk_rdev_t *rdev;
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
@@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r1
slot = 0;
disk = r10_bio->devs[slot].devnum;
- while (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync) {
+ while (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+ !rdev->in_sync) {
slot++;
if (slot == conf->copies) {
slot = 0;
@@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r1
/* make sure the disk is operational */
slot = 0;
disk = r10_bio->devs[slot].devnum;
- while (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync) {
+ while (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+ !rdev->in_sync) {
slot ++;
if (slot == conf->copies) {
disk = -1;
@@ -546,11 +547,11 @@ static int read_balance(conf_t *conf, r1
int ndisk = r10_bio->devs[nslot].devnum;
- if (!conf->mirrors[ndisk].rdev ||
- !conf->mirrors[ndisk].rdev->in_sync)
+ if (!(rdev = rcu_dereference(conf->mirrors[ndisk].rdev)) ||
+ !rdev->in_sync)
continue;
- if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) {
+ if (!atomic_read(&rdev->nr_pending)) {
disk = ndisk;
slot = nslot;
break;
@@ -568,8 +569,8 @@ rb_out:
r10_bio->read_slot = slot;
/* conf->next_seq_sect = this_sector + sectors;*/
- if (disk >= 0 && conf->mirrors[disk].rdev)
- atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
+ if (disk >= 0 && rdev)
+ atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
return disk;
@@ -582,7 +583,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -592,8 +593,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -613,7 +614,7 @@ static int raid10_issue_flush(request_qu
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -625,8 +626,8 @@ static int raid10_issue_flush(request_qu
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
@@ -763,10 +764,11 @@ static int make_request(request_queue_t
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
+ mdk_rdev_t *rdev;
int d = r10_bio->devs[i].devnum;
- if (conf->mirrors[d].rdev &&
- !conf->mirrors[d].rdev->faulty) {
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
+ if (rdev && !rdev->faulty) {
+ atomic_inc(&rdev->nr_pending);
r10_bio->devs[i].bio = bio;
} else
r10_bio->devs[i].bio = NULL;
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid5.c linux-2.5-mpio/drivers/md/raid5.c
--- linux-2.5/drivers/md/raid5.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid5.c Sat Dec 4 16:26:01 2004
@@ -1248,7 +1248,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid5_end_read_request;
rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1305,7 +1305,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -1315,8 +1315,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -1348,7 +1348,7 @@ static int raid5_issue_flush(request_que
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -1360,8 +1360,8 @@ static int raid5_issue_flush(request_que
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid6main.c linux-2.5-mpio/drivers/md/raid6main.c
--- linux-2.5/drivers/md/raid6main.c Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid6main.c Sat Dec 4 16:27:56 2004
@@ -1411,7 +1411,7 @@ static void handle_stripe(struct stripe_
bi->bi_end_io = raid6_end_read_request;
rcu_read_lock();
- rdev = conf->disks[i].rdev;
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && rdev->faulty)
rdev = NULL;
if (rdev)
@@ -1468,7 +1468,7 @@ static void unplug_slaves(mddev_t *mddev
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
@@ -1478,8 +1478,8 @@ static void unplug_slaves(mddev_t *mddev
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
rcu_read_unlock();
@@ -1511,7 +1511,7 @@ static int raid6_issue_flush(request_que
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !rdev->faulty) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -1523,8 +1523,8 @@ static int raid6_issue_flush(request_que
rcu_read_unlock();
ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
error_sector);
- rdev_dec_pending(rdev, mddev);
rcu_read_lock();
+ rdev_dec_pending(rdev, mddev);
}
}
}
next reply other threads:[~2004-12-05 23:10 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-12-05 0:31 Paul E. McKenney [this message]
2004-12-05 23:57 ` [RFC, PATCH] RCU questions on md driver Neil Brown
2004-12-06 18:30 ` Paul E. McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20041205003104.GA1914@us.ibm.com \
--to=paulmck@us.ibm.com \
--cc=andmike@us.ibm.com \
--cc=dipankar@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=neilb@cse.unsw.edu.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.