All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Paul E. McKenney" <paulmck@us.ibm.com>
To: andmike@us.ibm.com, dipankar@us.ibm.com, neilb@cse.unsw.edu.au,
	mingo@redhat.com
Cc: linux-kernel@vger.kernel.org
Subject: [RFC, PATCH] RCU questions on md driver
Date: Sat, 4 Dec 2004 16:31:04 -0800	[thread overview]
Message-ID: <20041205003104.GA1914@us.ibm.com> (raw)

Hello!

A few questions and comments on md driver locking, my best guess at
answers may be found in the patch below (which I have not even attempted
to compile, let alone test).

o	Need some rcu_dereference() primitives in a number of places.

o	The reference counts must be decremented after rcu_read_lock().
	Otherwise, unless I am missing something, a preemption (in a
	CONFIG_PREEMPT kernel) occuring just before the rcu_read_lock()
	looks like it could destroy the element subsequently used.

o	How does the locking work in read_balance() in drivers/md/raid1.c?
	It looks to me that the conf->mirrors[] array could potentially
	be changing during the read_balance() operation, which I cannot
	see how is handled correctly.

	For that matter, I don't see what prevents multiple instances
	of read_balance() from executing concurrently on the same
	set of disks...

o	Ditto for read_balance in drivers/md/raid10.c.  And raid5.c.

Thoughts?

						Thanx, Paul


diff -urpN -X ../dontdiff linux-2.5/drivers/md/multipath.c linux-2.5-mpio/drivers/md/multipath.c
--- linux-2.5/drivers/md/multipath.c	Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/multipath.c	Sat Dec  4 15:32:19 2004
@@ -63,7 +63,7 @@ static int multipath_map (multipath_conf
 
 	rcu_read_lock();
 	for (i = 0; i < disks; i++) {
-		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
 		if (rdev && rdev->in_sync) {
 			atomic_inc(&rdev->nr_pending);
 			rcu_read_unlock();
@@ -138,7 +138,7 @@ static void unplug_slaves(mddev_t *mddev
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
-		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
@@ -148,8 +148,8 @@ static void unplug_slaves(mddev_t *mddev
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			rdev_dec_pending(rdev, mddev);
 			rcu_read_lock();
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	rcu_read_unlock();
@@ -222,7 +222,7 @@ static int multipath_issue_flush(request
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
 			request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -234,8 +234,8 @@ static int multipath_issue_flush(request
 				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
-				rdev_dec_pending(rdev, mddev);
 				rcu_read_lock();
+				rdev_dec_pending(rdev, mddev);
 			}
 		}
 	}
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid1.c linux-2.5-mpio/drivers/md/raid1.c
--- linux-2.5/drivers/md/raid1.c	Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid1.c	Sat Dec  4 16:16:48 2004
@@ -338,6 +338,7 @@ static int read_balance(conf_t *conf, r1
 	int new_disk = conf->last_used, disk = new_disk;
 	const int sectors = r1_bio->sectors;
 	sector_t new_distance, current_distance;
+	mdk_rdev_t *rdev;
 
 	rcu_read_lock();
 	/*
@@ -350,8 +351,9 @@ static int read_balance(conf_t *conf, r1
 		/* Choose the first operation device, for consistancy */
 		new_disk = 0;
 
-		while (!conf->mirrors[new_disk].rdev ||
-		       !conf->mirrors[new_disk].rdev->in_sync) {
+		while (!(rdev =
+			 rcu_dereference(conf->mirrors[new_disk].rdev)) ||
+		       !rdev->in_sync) {
 			new_disk++;
 			if (new_disk == conf->raid_disks) {
 				new_disk = -1;
@@ -363,8 +365,8 @@ static int read_balance(conf_t *conf, r1
 
 
 	/* make sure the disk is operational */
-	while (!conf->mirrors[new_disk].rdev ||
-	       !conf->mirrors[new_disk].rdev->in_sync) {
+	while (!(rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) ||
+	       !rdev->in_sync) {
 		if (new_disk <= 0)
 			new_disk = conf->raid_disks;
 		new_disk--;
@@ -393,11 +395,11 @@ static int read_balance(conf_t *conf, r1
 			disk = conf->raid_disks;
 		disk--;
 
-		if (!conf->mirrors[disk].rdev ||
-		    !conf->mirrors[disk].rdev->in_sync)
+		if (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+		    !rdev->in_sync)
 			continue;
 
-		if (!atomic_read(&conf->mirrors[disk].rdev->nr_pending)) {
+		if (!atomic_read(&rdev->nr_pending)) {
 			new_disk = disk;
 			break;
 		}
@@ -414,7 +416,7 @@ rb_out:
 	if (new_disk >= 0) {
 		conf->next_seq_sect = this_sector + sectors;
 		conf->last_used = new_disk;
-		atomic_inc(&conf->mirrors[new_disk].rdev->nr_pending);
+		atomic_inc(&rdev->nr_pending);
 	}
 	rcu_read_unlock();
 
@@ -428,7 +430,7 @@ static void unplug_slaves(mddev_t *mddev
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
-		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
@@ -438,8 +440,8 @@ static void unplug_slaves(mddev_t *mddev
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			rdev_dec_pending(rdev, mddev);
 			rcu_read_lock();
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	rcu_read_unlock();
@@ -459,7 +461,7 @@ static int raid1_issue_flush(request_que
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
 			request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -471,8 +473,8 @@ static int raid1_issue_flush(request_que
 				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
-				rdev_dec_pending(rdev, mddev);
 				rcu_read_lock();
+				rdev_dec_pending(rdev, mddev);
 			}
 		}
 	}
@@ -585,9 +587,9 @@ static int make_request(request_queue_t 
 	disks = conf->raid_disks;
 	rcu_read_lock();
 	for (i = 0;  i < disks; i++) {
-		if (conf->mirrors[i].rdev &&
-		    !conf->mirrors[i].rdev->faulty) {
-			atomic_inc(&conf->mirrors[i].rdev->nr_pending);
+		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+		if (rdev && !rdev->faulty) {
+			atomic_inc(&rdev->nr_pending);
 			r1_bio->bios[i] = bio;
 		} else
 			r1_bio->bios[i] = NULL;
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid10.c linux-2.5-mpio/drivers/md/raid10.c
--- linux-2.5/drivers/md/raid10.c	Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid10.c	Sat Dec  4 16:23:49 2004
@@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r1
 	int disk, slot, nslot;
 	const int sectors = r10_bio->sectors;
 	sector_t new_distance, current_distance;
+	mdk_rdev_t *rdev;
 
 	raid10_find_phys(conf, r10_bio);
 	rcu_read_lock();
@@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r1
 		slot = 0;
 		disk = r10_bio->devs[slot].devnum;
 
-		while (!conf->mirrors[disk].rdev ||
-		       !conf->mirrors[disk].rdev->in_sync) {
+		while (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+		       !rdev->in_sync) {
 			slot++;
 			if (slot == conf->copies) {
 				slot = 0;
@@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r1
 	/* make sure the disk is operational */
 	slot = 0;
 	disk = r10_bio->devs[slot].devnum;
-	while (!conf->mirrors[disk].rdev ||
-	       !conf->mirrors[disk].rdev->in_sync) {
+	while (!(rdev = rcu_dereference(conf->mirrors[disk].rdev)) ||
+	       !rdev->in_sync) {
 		slot ++;
 		if (slot == conf->copies) {
 			disk = -1;
@@ -546,11 +547,11 @@ static int read_balance(conf_t *conf, r1
 		int ndisk = r10_bio->devs[nslot].devnum;
 
 
-		if (!conf->mirrors[ndisk].rdev ||
-		    !conf->mirrors[ndisk].rdev->in_sync)
+		if (!(rdev = rcu_dereference(conf->mirrors[ndisk].rdev)) ||
+		    !rdev->in_sync)
 			continue;
 
-		if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) {
+		if (!atomic_read(&rdev->nr_pending)) {
 			disk = ndisk;
 			slot = nslot;
 			break;
@@ -568,8 +569,8 @@ rb_out:
 	r10_bio->read_slot = slot;
 /*	conf->next_seq_sect = this_sector + sectors;*/
 
-	if (disk >= 0 && conf->mirrors[disk].rdev)
-		atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
+	if (disk >= 0 && rdev)
+		atomic_inc(&rdev->nr_pending);
 	rcu_read_unlock();
 
 	return disk;
@@ -582,7 +583,7 @@ static void unplug_slaves(mddev_t *mddev
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
-		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
@@ -592,8 +593,8 @@ static void unplug_slaves(mddev_t *mddev
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			rdev_dec_pending(rdev, mddev);
 			rcu_read_lock();
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	rcu_read_unlock();
@@ -613,7 +614,7 @@ static int raid10_issue_flush(request_qu
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
 			request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -625,8 +626,8 @@ static int raid10_issue_flush(request_qu
 				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
-				rdev_dec_pending(rdev, mddev);
 				rcu_read_lock();
+				rdev_dec_pending(rdev, mddev);
 			}
 		}
 	}
@@ -763,10 +764,11 @@ static int make_request(request_queue_t 
 	raid10_find_phys(conf, r10_bio);
 	rcu_read_lock();
 	for (i = 0;  i < conf->copies; i++) {
+		mdk_rdev_t *rdev;
 		int d = r10_bio->devs[i].devnum;
-		if (conf->mirrors[d].rdev &&
-		    !conf->mirrors[d].rdev->faulty) {
-			atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+		rdev = rcu_dereference(conf->mirrors[d].rdev);
+		if (rdev && !rdev->faulty) {
+			atomic_inc(&rdev->nr_pending);
 			r10_bio->devs[i].bio = bio;
 		} else
 			r10_bio->devs[i].bio = NULL;
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid5.c linux-2.5-mpio/drivers/md/raid5.c
--- linux-2.5/drivers/md/raid5.c	Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid5.c	Sat Dec  4 16:26:01 2004
@@ -1248,7 +1248,7 @@ static void handle_stripe(struct stripe_
 			bi->bi_end_io = raid5_end_read_request;
  
 		rcu_read_lock();
-		rdev = conf->disks[i].rdev;
+		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (rdev && rdev->faulty)
 			rdev = NULL;
 		if (rdev)
@@ -1305,7 +1305,7 @@ static void unplug_slaves(mddev_t *mddev
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
-		mdk_rdev_t *rdev = conf->disks[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
@@ -1315,8 +1315,8 @@ static void unplug_slaves(mddev_t *mddev
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			rdev_dec_pending(rdev, mddev);
 			rcu_read_lock();
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	rcu_read_unlock();
@@ -1348,7 +1348,7 @@ static int raid5_issue_flush(request_que
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = conf->disks[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
 			request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -1360,8 +1360,8 @@ static int raid5_issue_flush(request_que
 				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
-				rdev_dec_pending(rdev, mddev);
 				rcu_read_lock();
+				rdev_dec_pending(rdev, mddev);
 			}
 		}
 	}
diff -urpN -X ../dontdiff linux-2.5/drivers/md/raid6main.c linux-2.5-mpio/drivers/md/raid6main.c
--- linux-2.5/drivers/md/raid6main.c	Mon Nov 29 10:48:55 2004
+++ linux-2.5-mpio/drivers/md/raid6main.c	Sat Dec  4 16:27:56 2004
@@ -1411,7 +1411,7 @@ static void handle_stripe(struct stripe_
 			bi->bi_end_io = raid6_end_read_request;
 
 		rcu_read_lock();
-		rdev = conf->disks[i].rdev;
+		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (rdev && rdev->faulty)
 			rdev = NULL;
 		if (rdev)
@@ -1468,7 +1468,7 @@ static void unplug_slaves(mddev_t *mddev
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks; i++) {
-		mdk_rdev_t *rdev = conf->disks[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
 		if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
 			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
@@ -1478,8 +1478,8 @@ static void unplug_slaves(mddev_t *mddev
 			if (r_queue->unplug_fn)
 				r_queue->unplug_fn(r_queue);
 
-			rdev_dec_pending(rdev, mddev);
 			rcu_read_lock();
+			rdev_dec_pending(rdev, mddev);
 		}
 	}
 	rcu_read_unlock();
@@ -1511,7 +1511,7 @@ static int raid6_issue_flush(request_que
 
 	rcu_read_lock();
 	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = conf->disks[i].rdev;
+		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
 		if (rdev && !rdev->faulty) {
 			struct block_device *bdev = rdev->bdev;
 			request_queue_t *r_queue = bdev_get_queue(bdev);
@@ -1523,8 +1523,8 @@ static int raid6_issue_flush(request_que
 				rcu_read_unlock();
 				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 							      error_sector);
-				rdev_dec_pending(rdev, mddev);
 				rcu_read_lock();
+				rdev_dec_pending(rdev, mddev);
 			}
 		}
 	}

             reply	other threads:[~2004-12-05 23:10 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-12-05  0:31 Paul E. McKenney [this message]
2004-12-05 23:57 ` [RFC, PATCH] RCU questions on md driver Neil Brown
2004-12-06 18:30   ` Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20041205003104.GA1914@us.ibm.com \
    --to=paulmck@us.ibm.com \
    --cc=andmike@us.ibm.com \
    --cc=dipankar@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=neilb@cse.unsw.edu.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.