linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org
Subject: [PATCH md 006 of 14] Make /proc/mdstat pollable.
Date: Thu, 1 Dec 2005 14:23:16 +1100	[thread overview]
Message-ID: <1051201032316.29617@suse.de> (raw)
In-Reply-To: 20051201141508.29384.patches@notabene


With this patch it is possible to poll /proc/mdstat to detect
arrays appearing or disappearing, to detect failures,
recovery starting, recovery completing, and devices being
added and removed.

It is similar to the poll-ability of /proc/mounts, though different in that:

We always report that the file is readable (because face it, it is, even
if only for EOF).
We report POLLPRI when there is a change so that select() can detect
it as an exceptional event.  Not only are these exceptional events, but 
that is the mechanism that the current 'mdadm' uses to watch for events
(It also polls after a timeout).
(We also report POLLERR like /proc/mounts).
Finally, we only reset the per-file event counter when the start of the 
file is read, rather than when poll() returns an event.  This is more
robust as it means that an fd will continue to report activity to poll/select
until the program clearly responds to that activity.

md_new_event takes an 'mddev' which isn't currently used, but it will
be soon.

Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./drivers/md/md.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 76 insertions(+), 5 deletions(-)

diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~	2005-12-01 13:56:49.000000000 +1100
+++ ./drivers/md/md.c	2005-12-01 13:57:01.000000000 +1100
@@ -42,6 +42,7 @@
 #include <linux/devfs_fs_kernel.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
 #include <linux/suspend.h>
+#include <linux/poll.h>
 
 #include <linux/init.h>
 
@@ -134,6 +135,24 @@ static struct block_device_operations md
 static int start_readonly;
 
 /*
+ * We have a system wide 'event count' that is incremented
+ * on any 'interesting' event, and readers of /proc/mdstat
+ * can use 'poll' or 'select' to find out when the event
+ * count increases.
+ *
+ * Events are:
+ *  start array, stop array, error, add device, remove device,
+ *  start build, activate spare
+ */
+DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
+static atomic_t md_event_count;
+void md_new_event(mddev_t *mddev)
+{
+	atomic_inc(&md_event_count);
+	wake_up(&md_event_waiters);
+}
+
+/*
  * Enables to iterate over all existing md arrays
  * all_mddevs_lock protects this list.
  */
@@ -2111,6 +2130,7 @@ static int do_md_run(mddev_t * mddev)
 	mddev->queue->make_request_fn = mddev->pers->make_request;
 
 	mddev->changed = 1;
+	md_new_event(mddev);
 	return 0;
 }
 
@@ -2238,6 +2258,7 @@ static int do_md_stop(mddev_t * mddev, i
 		printk(KERN_INFO "md: %s switched to read-only mode.\n",
 			mdname(mddev));
 	err = 0;
+	md_new_event(mddev);
 out:
 	return err;
 }
@@ -2712,6 +2733,7 @@ static int hot_remove_disk(mddev_t * mdd
 
 	kick_rdev_from_array(rdev);
 	md_update_sb(mddev);
+	md_new_event(mddev);
 
 	return 0;
 busy:
@@ -2802,7 +2824,7 @@ static int hot_add_disk(mddev_t * mddev,
 	 */
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
-
+	md_new_event(mddev);
 	return 0;
 
 abort_unbind_export:
@@ -3523,6 +3545,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
+	md_new_event(mddev);
 }
 
 /* seq_file implementation /proc/mdstat */
@@ -3663,12 +3686,17 @@ static void md_seq_stop(struct seq_file 
 		mddev_put(mddev);
 }
 
+struct mdstat_info {
+	int event;
+};
+
 static int md_seq_show(struct seq_file *seq, void *v)
 {
 	mddev_t *mddev = v;
 	sector_t size;
 	struct list_head *tmp2;
 	mdk_rdev_t *rdev;
+	struct mdstat_info *mi = seq->private;
 	int i;
 	struct bitmap *bitmap;
 
@@ -3681,6 +3709,7 @@ static int md_seq_show(struct seq_file *
 
 		spin_unlock(&pers_lock);
 		seq_printf(seq, "\n");
+		mi->event = atomic_read(&md_event_count);
 		return 0;
 	}
 	if (v == (void*)2) {
@@ -3789,16 +3818,52 @@ static struct seq_operations md_seq_ops 
 static int md_seq_open(struct inode *inode, struct file *file)
 {
 	int error;
+	struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
+	if (mi == NULL)
+		return -ENOMEM;
 
 	error = seq_open(file, &md_seq_ops);
+	if (error)
+		kfree(mi);
+	else {
+		struct seq_file *p = file->private_data;
+		p->private = mi;
+		mi->event = atomic_read(&md_event_count);
+	}
 	return error;
 }
 
+static int md_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	struct mdstat_info *mi = m->private;
+	m->private = NULL;
+	kfree(mi);
+	return seq_release(inode, file);
+}
+
+static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
+{
+	struct seq_file *m = filp->private_data;
+	struct mdstat_info *mi = m->private;
+	int mask;
+
+	poll_wait(filp, &md_event_waiters, wait);
+
+	/* always allow read */
+	mask = POLLIN | POLLRDNORM;
+
+	if (mi->event != atomic_read(&md_event_count))
+		mask |= POLLERR | POLLPRI;
+	return mask;
+}
+
 static struct file_operations md_seq_fops = {
 	.open           = md_seq_open,
 	.read           = seq_read,
 	.llseek         = seq_lseek,
-	.release	= seq_release,
+	.release	= md_seq_release,
+	.poll		= mdstat_poll,
 };
 
 int register_md_personality(int pnum, mdk_personality_t *p)
@@ -4068,7 +4133,11 @@ static void md_do_sync(mddev_t *mddev)
 
 		j += sectors;
 		if (j>1) mddev->curr_resync = j;
-
+		if (last_check == 0)
+			/* this is the earliers that rebuilt will be
+			 * visible in /proc/mdstat
+			 */
+			md_new_event(mddev);
 
 		if (last_check + window > io_sectors || j == max_sectors)
 			continue;
@@ -4254,6 +4323,7 @@ void md_check_recovery(mddev_t *mddev)
 			mddev->recovery = 0;
 			/* flag recovery needed just to double check */
 			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+			md_new_event(mddev);
 			goto unlock;
 		}
 		/* Clear some bits that don't mean anything, but
@@ -4291,6 +4361,7 @@ void md_check_recovery(mddev_t *mddev)
 						sprintf(nm, "rd%d", rdev->raid_disk);
 						sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
 						spares++;
+						md_new_event(mddev);
 					} else
 						break;
 				}
@@ -4323,9 +4394,9 @@ void md_check_recovery(mddev_t *mddev)
 					mdname(mddev));
 				/* leave the spares where they are, it shouldn't hurt */
 				mddev->recovery = 0;
-			} else {
+			} else
 				md_wakeup_thread(mddev->sync_thread);
-			}
+			md_new_event(mddev);
 		}
 	unlock:
 		mddev_unlock(mddev);

  parent reply	other threads:[~2005-12-01  3:23 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-12-01  3:22 [PATCH md 000 of 14] Introduction NeilBrown
2005-12-01  3:22 ` [PATCH md 001 of 14] Support check-without-repair of raid10 arrays NeilBrown
2005-12-01  3:22 ` [PATCH md 002 of 14] Allow raid1 to check consistency NeilBrown
2005-12-01 22:34   ` Andrew Morton
2005-12-05 23:30     ` Neil Brown
2005-12-06  3:50       ` Andrew Morton
2005-12-01  3:23 ` [PATCH md 003 of 14] Make sure read error on last working drive of raid1 actually returns failure NeilBrown
2005-12-01  3:23 ` [PATCH md 004 of 14] auto-correct correctable read errors in raid10 NeilBrown
2005-12-01  3:23 ` [PATCH md 005 of 14] raid10 read-error handling - resync and read-only NeilBrown
2005-12-01  3:23 ` NeilBrown [this message]
2005-12-01 22:39   ` [PATCH md 006 of 14] Make /proc/mdstat pollable Andrew Morton
2005-12-01  3:23 ` [PATCH md 007 of 14] Clean up 'page' related names in md NeilBrown
2005-12-01  3:23 ` [PATCH md 008 of 14] Convert md to use kzalloc throughout NeilBrown
2005-12-01 22:42   ` Andrew Morton
2005-12-01  3:23 ` [PATCH md 009 of 14] Tidy up raid5/6 hash table code NeilBrown
2005-12-01  3:23 ` [PATCH md 010 of 14] Convert various kmap calls to kmap_atomic NeilBrown
2005-12-01 22:46   ` Andrew Morton
2005-12-05 23:43     ` Neil Brown
2005-12-01  3:23 ` [PATCH md 011 of 14] Convert recently exported symbol to GPL NeilBrown
2005-12-01  3:23 ` [PATCH md 012 of 14] Break out of a loop that doesn't need to run to completion NeilBrown
2005-12-01  3:23 ` [PATCH md 013 of 14] Remove personality numbering from md NeilBrown
2005-12-01  3:24 ` [PATCH md 014 of 14] Fix possible problem in raid1/raid10 error overwriting NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1051201032316.29617@suse.de \
    --to=neilb@suse.de \
    --cc=akpm@osdl.org \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).