* [PATCH md 001 of 5] Initial sysfs support for md
2005-10-04 5:23 [PATCH md 000 of 5] Introduction NeilBrown
@ 2005-10-04 5:23 ` NeilBrown
2005-10-04 5:23 ` [PATCH md 002 of 5] Extend md sysfs support to component devices NeilBrown
` (3 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2005-10-04 5:23 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid
Start using kobjects in mddevs, and provide a couple
of simple attributes (level and disks).
Attributes live in
/sys/block/mdX/md/attr-name
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/md.c | 86 +++++++++++++++++++++++++++++++++++++++++++-
./include/linux/raid/md_k.h | 2 +
2 files changed, 87 insertions(+), 1 deletion(-)
diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~ 2005-10-04 11:54:43.000000000 +1000
+++ ./drivers/md/md.c 2005-10-04 12:16:53.000000000 +1000
@@ -181,7 +181,7 @@ static void mddev_put(mddev_t *mddev)
if (!mddev->raid_disks && list_empty(&mddev->disks)) {
list_del(&mddev->all_mddevs);
blk_put_queue(mddev->queue);
- kfree(mddev);
+ kobject_unregister(&mddev->kobj);
}
spin_unlock(&all_mddevs_lock);
}
@@ -1551,6 +1551,85 @@ static void analyze_sbs(mddev_t * mddev)
}
+struct md_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(mddev_t *, char *);
+ ssize_t (*store)(mddev_t *, const char *, size_t);
+};
+
+static ssize_t
+md_show_level(mddev_t *mddev, char *page)
+{
+ mdk_personality_t *p = mddev->pers;
+ if (p == NULL)
+ return 0;
+ if (mddev->level >= 0)
+ return sprintf(page, "RAID-%d\n", mddev->level);
+ else
+ return sprintf(page, "%s\n", p->name);
+}
+
+static struct md_sysfs_entry md_level = {
+ .attr = {.name = "level", .mode = S_IRUGO },
+ .show = md_show_level,
+};
+
+static ssize_t
+md_show_rdisks(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d\n", mddev->raid_disks);
+}
+
+static struct md_sysfs_entry md_raid_disks = {
+ .attr = {.name = "raid_disks", .mode = S_IRUGO },
+ .show = md_show_rdisks,
+};
+
+static struct attribute *md_default_attrs[] = {
+ &md_level.attr,
+ &md_raid_disks.attr,
+ NULL,
+};
+
+static ssize_t
+md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
+ mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
+
+ if (!entry->show)
+ return -EIO;
+ return entry->show(mddev, page);
+}
+
+static ssize_t
+md_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
+ mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
+
+ if (!entry->store)
+ return -EIO;
+ return entry->store(mddev, page, length);
+}
+
+static void md_free(struct kobject *ko)
+{
+ mddev_t *mddev = container_of(ko, mddev_t, kobj);
+ kfree(mddev);
+}
+
+static struct sysfs_ops md_sysfs_ops = {
+ .show = md_attr_show,
+ .store = md_attr_store,
+};
+static struct kobj_type md_ktype = {
+ .release = md_free,
+ .sysfs_ops = &md_sysfs_ops,
+ .default_attrs = md_default_attrs,
+};
+
int mdp_major = 0;
static struct kobject *md_probe(dev_t dev, int *part, void *data)
@@ -1592,6 +1671,11 @@ static struct kobject *md_probe(dev_t de
add_disk(disk);
mddev->gendisk = disk;
up(&disks_sem);
+ mddev->kobj.parent = kobject_get(&disk->kobj);
+ mddev->kobj.k_name = NULL;
+ snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md");
+ mddev->kobj.ktype = &md_ktype;
+ kobject_register(&mddev->kobj);
return NULL;
}
diff ./include/linux/raid/md_k.h~current~ ./include/linux/raid/md_k.h
--- ./include/linux/raid/md_k.h~current~ 2005-10-04 11:54:43.000000000 +1000
+++ ./include/linux/raid/md_k.h 2005-10-04 11:54:43.000000000 +1000
@@ -148,6 +148,8 @@ struct mddev_s
struct gendisk *gendisk;
+ struct kobject kobj;
+
/* Superblock information */
int major_version,
minor_version,
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH md 002 of 5] Extend md sysfs support to component devices.
2005-10-04 5:23 [PATCH md 000 of 5] Introduction NeilBrown
2005-10-04 5:23 ` [PATCH md 001 of 5] Initial sysfs support for md NeilBrown
@ 2005-10-04 5:23 ` NeilBrown
2005-10-11 23:51 ` Andrew Morton
2005-10-04 5:23 ` [PATCH md 003 of 5] Add kobject/sysfs support to raid5 NeilBrown
` (2 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: NeilBrown @ 2005-10-04 5:23 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid
Each device in an md array how has a corresponding
/sys/block/mdX/md/devNN/
directory which can contain attributes. Currently
there is only 'state' which summarises the state, nd
'super' which has a copy of the superblock, and
'block' which is a symlink to the block device.
Also, /sys/block/mdX/md/rdNN represents slot 'NN' in
the array, and is a symlink to the relevant 'devNN'.
Obviously spare devices do not have a slot in the array,
and so don't have such a symlink.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/md.c | 168 +++++++++++++++++++++++++++++++++++++++++---
./include/linux/raid/md_k.h | 2
2 files changed, 162 insertions(+), 8 deletions(-)
diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~ 2005-10-04 12:16:53.000000000 +1000
+++ ./drivers/md/md.c 2005-10-04 12:25:36.000000000 +1000
@@ -711,6 +711,7 @@ static void super_90_sync(mddev_t *mddev
*/
int i;
int active=0, working=0,failed=0,spare=0,nr_disks=0;
+ unsigned int fixdesc=0;
rdev->sb_size = MD_SB_BYTES;
@@ -758,16 +759,28 @@ static void super_90_sync(mddev_t *mddev
sb->disks[0].state = (1<<MD_DISK_REMOVED);
ITERATE_RDEV(mddev,rdev2,tmp) {
mdp_disk_t *d;
+ int desc_nr;
if (rdev2->raid_disk >= 0 && rdev2->in_sync && !rdev2->faulty)
- rdev2->desc_nr = rdev2->raid_disk;
+ desc_nr = rdev2->raid_disk;
else
- rdev2->desc_nr = next_spare++;
+ desc_nr = next_spare++;
+ if (desc_nr != rdev2->desc_nr) {
+ fixdesc |= (1 << desc_nr);
+ rdev2->desc_nr = desc_nr;
+ if (rdev2->raid_disk >= 0) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev2->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+ }
+ sysfs_remove_link(&rdev2->kobj, "block");
+ kobject_del(&rdev2->kobj);
+ }
d = &sb->disks[rdev2->desc_nr];
nr_disks++;
d->number = rdev2->desc_nr;
d->major = MAJOR(rdev2->bdev->bd_dev);
d->minor = MINOR(rdev2->bdev->bd_dev);
- if (rdev2->raid_disk >= 0 && rdev->in_sync && !rdev2->faulty)
+ if (rdev2->raid_disk >= 0 && rdev2->in_sync && !rdev2->faulty)
d->raid_disk = rdev2->raid_disk;
else
d->raid_disk = rdev2->desc_nr; /* compatibility */
@@ -787,7 +800,22 @@ static void super_90_sync(mddev_t *mddev
if (test_bit(WriteMostly, &rdev2->flags))
d->state |= (1<<MD_DISK_WRITEMOSTLY);
}
-
+ if (fixdesc)
+ ITERATE_RDEV(mddev,rdev2,tmp)
+ if (fixdesc & (1<<rdev2->desc_nr)) {
+ snprintf(rdev2->kobj.name, KOBJ_NAME_LEN, "dev%d",
+ rdev2->desc_nr);
+ kobject_add(&rdev2->kobj);
+ sysfs_create_link(&rdev2->kobj,
+ &rdev2->bdev->bd_disk->kobj,
+ "block");
+ if (rdev2->raid_disk >= 0) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev2->raid_disk);
+ sysfs_create_link(&mddev->kobj,
+ &rdev2->kobj, nm);
+ }
+ }
/* now set the "removed" and "faulty" bits on any missing devices */
for (i=0 ; i < mddev->raid_disks ; i++) {
mdp_disk_t *d = &sb->disks[i];
@@ -1147,6 +1175,13 @@ static int bind_rdev_to_array(mdk_rdev_t
list_add(&rdev->same_set, &mddev->disks);
rdev->mddev = mddev;
printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b));
+
+ rdev->kobj.k_name = NULL;
+ snprintf(rdev->kobj.name, KOBJ_NAME_LEN, "dev%d", rdev->desc_nr);
+ rdev->kobj.parent = kobject_get(&mddev->kobj);
+ kobject_add(&rdev->kobj);
+
+ sysfs_create_link(&rdev->kobj, &rdev->bdev->bd_disk->kobj, "block");
return 0;
}
@@ -1160,6 +1195,8 @@ static void unbind_rdev_from_array(mdk_r
list_del_init(&rdev->same_set);
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
+ sysfs_remove_link(&rdev->kobj, "block");
+ kobject_del(&rdev->kobj);
}
/*
@@ -1215,7 +1252,7 @@ static void export_rdev(mdk_rdev_t * rde
md_autodetect_dev(rdev->bdev->bd_dev);
#endif
unlock_rdev(rdev);
- kfree(rdev);
+ kobject_put(&rdev->kobj);
}
static void kick_rdev_from_array(mdk_rdev_t * rdev)
@@ -1414,6 +1451,94 @@ repeat:
}
+struct rdev_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(mdk_rdev_t *, char *);
+ ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
+};
+
+static ssize_t
+rdev_show_state(mdk_rdev_t *rdev, char *page)
+{
+ char *sep = "";
+ int len=0;
+
+ if (rdev->faulty) {
+ len+= sprintf(page+len, "%sfaulty",sep);
+ sep = ",";
+ }
+ if (rdev->in_sync) {
+ len += sprintf(page+len, "%sin_sync",sep);
+ sep = ",";
+ }
+ if (!rdev->faulty && !rdev->in_sync) {
+ len += sprintf(page+len, "%sspare", sep);
+ sep = ",";
+ }
+ return len+sprintf(page+len, "\n");
+}
+
+static struct rdev_sysfs_entry rdev_state = {
+ .attr = {.name = "state", .mode = S_IRUGO },
+ .show = rdev_show_state,
+};
+
+static ssize_t
+rdev_show_super(mdk_rdev_t *rdev, char *page)
+{
+ if (rdev->sb_loaded && rdev->sb_size) {
+ memcpy(page, page_address(rdev->sb_page), rdev->sb_size);
+ return rdev->sb_size;
+ } else
+ return 0;
+}
+static struct rdev_sysfs_entry rdev_super = {
+ .attr = {.name = "super", .mode = S_IRUGO },
+ .show = rdev_show_super,
+};
+static struct attribute *rdev_default_attrs[] = {
+ &rdev_state.attr,
+ &rdev_super.attr,
+ NULL,
+};
+static ssize_t
+rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
+ mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+
+ if (!entry->show)
+ return -EIO;
+ return entry->show(rdev, page);
+}
+
+static ssize_t
+rdev_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
+ mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+
+ if (!entry->store)
+ return -EIO;
+ return entry->store(rdev, page, length);
+}
+
+static void rdev_free(struct kobject *ko)
+{
+ mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
+ kfree(rdev);
+}
+static struct sysfs_ops rdev_sysfs_ops = {
+ .show = rdev_attr_show,
+ .store = rdev_attr_store,
+};
+static struct kobj_type rdev_ktype = {
+ .release = rdev_free,
+ .sysfs_ops = &rdev_sysfs_ops,
+ .default_attrs = rdev_default_attrs,
+};
+
/*
* Import a device. If 'super_format' >= 0, then sanity check the superblock
*
@@ -1445,6 +1570,10 @@ static mdk_rdev_t *md_import_device(dev_
if (err)
goto abort_free;
+ rdev->kobj.parent = NULL;
+ rdev->kobj.ktype = &rdev_ktype;
+ kobject_init(&rdev->kobj);
+
rdev->desc_nr = -1;
rdev->faulty = 0;
rdev->in_sync = 0;
@@ -1820,6 +1949,13 @@ static int do_md_run(mddev_t * mddev)
mddev->safemode_timer.data = (unsigned long) mddev;
mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */
mddev->in_sync = 1;
+
+ ITERATE_RDEV(mddev,rdev,tmp)
+ if (rdev->raid_disk >= 0) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+ }
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
@@ -1941,9 +2077,18 @@ static int do_md_stop(mddev_t * mddev, i
* Free resources if final stop
*/
if (!ro) {
+ mdk_rdev_t *rdev;
+ struct list_head *tmp;
struct gendisk *disk;
printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
+ ITERATE_RDEV(mddev,rdev,tmp)
+ if (rdev->raid_disk >= 0) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+ }
+
export_array(mddev);
mddev->array_size = 0;
@@ -3958,17 +4103,24 @@ void md_check_recovery(mddev_t *mddev)
if (rdev->raid_disk >= 0 &&
(rdev->faulty || ! rdev->in_sync) &&
atomic_read(&rdev->nr_pending)==0) {
- if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0)
+ if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) {
+ char nm[20];
+ sprintf(nm,"rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
rdev->raid_disk = -1;
+ }
}
if (mddev->degraded) {
ITERATE_RDEV(mddev,rdev,rtmp)
if (rdev->raid_disk < 0
&& !rdev->faulty) {
- if (mddev->pers->hot_add_disk(mddev,rdev))
+ if (mddev->pers->hot_add_disk(mddev,rdev)) {
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
spares++;
- else
+ } else
break;
}
}
diff ./include/linux/raid/md_k.h~current~ ./include/linux/raid/md_k.h
--- ./include/linux/raid/md_k.h~current~ 2005-10-04 11:54:43.000000000 +1000
+++ ./include/linux/raid/md_k.h 2005-10-04 12:19:17.000000000 +1000
@@ -105,6 +105,8 @@ struct mdk_rdev_s
int sb_size; /* bytes in the superblock */
int preferred_minor; /* autorun support */
+ struct kobject kobj;
+
/* A device can be in one of three states based on two flags:
* Not working: faulty==1 in_sync==0
* Fully working: faulty==0 in_sync==1
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH md 003 of 5] Add kobject/sysfs support to raid5
2005-10-04 5:23 [PATCH md 000 of 5] Introduction NeilBrown
2005-10-04 5:23 ` [PATCH md 001 of 5] Initial sysfs support for md NeilBrown
2005-10-04 5:23 ` [PATCH md 002 of 5] Extend md sysfs support to component devices NeilBrown
@ 2005-10-04 5:23 ` NeilBrown
2005-10-11 23:54 ` Andrew Morton
2005-10-04 5:23 ` [PATCH md 004 of 5] Allow a manual resync with md NeilBrown
2005-10-04 5:23 ` [PATCH md 005 of 5] Teach raid5 the difference between 'check' and 'repair' NeilBrown
4 siblings, 1 reply; 12+ messages in thread
From: NeilBrown @ 2005-10-04 5:23 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid
/sys/block/mdX/md/raid5/
contains raid5-related attributes.
Currently
stripe_cache_size
is number of entries in stripe cache, and is settable.
stripe_cache_active
is number of active entries, and in only readable.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid5.c | 183 +++++++++++++++++++++++++++++++++++--------
./include/linux/raid/raid5.h | 1
2 files changed, 152 insertions(+), 32 deletions(-)
diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~ 2005-10-04 13:08:41.000000000 +1000
+++ ./drivers/md/raid5.c 2005-10-04 13:08:44.000000000 +1000
@@ -293,9 +293,31 @@ static struct stripe_head *get_active_st
return sh;
}
-static int grow_stripes(raid5_conf_t *conf, int num)
+static int grow_one_stripe(raid5_conf_t *conf)
{
struct stripe_head *sh;
+ sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
+ if (!sh)
+ return 0;
+ memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
+ sh->raid_conf = conf;
+ spin_lock_init(&sh->lock);
+
+ if (grow_buffers(sh, conf->raid_disks)) {
+ shrink_buffers(sh, conf->raid_disks);
+ kmem_cache_free(conf->slab_cache, sh);
+ return 0;
+ }
+ /* we just created an active stripe so... */
+ atomic_set(&sh->count, 1);
+ atomic_inc(&conf->active_stripes);
+ INIT_LIST_HEAD(&sh->lru);
+ release_stripe(sh);
+ return 1;
+}
+
+static int grow_stripes(raid5_conf_t *conf, int num)
+{
kmem_cache_t *sc;
int devs = conf->raid_disks;
@@ -308,43 +330,34 @@ static int grow_stripes(raid5_conf_t *co
return 1;
conf->slab_cache = sc;
while (num--) {
- sh = kmem_cache_alloc(sc, GFP_KERNEL);
- if (!sh)
+ if (!grow_one_stripe(conf))
return 1;
- memset(sh, 0, sizeof(*sh) + (devs-1)*sizeof(struct r5dev));
- sh->raid_conf = conf;
- spin_lock_init(&sh->lock);
-
- if (grow_buffers(sh, conf->raid_disks)) {
- shrink_buffers(sh, conf->raid_disks);
- kmem_cache_free(sc, sh);
- return 1;
- }
- /* we just created an active stripe so... */
- atomic_set(&sh->count, 1);
- atomic_inc(&conf->active_stripes);
- INIT_LIST_HEAD(&sh->lru);
- release_stripe(sh);
}
return 0;
}
-static void shrink_stripes(raid5_conf_t *conf)
+static int drop_one_stripe(raid5_conf_t *conf)
{
struct stripe_head *sh;
- while (1) {
- spin_lock_irq(&conf->device_lock);
- sh = get_free_stripe(conf);
- spin_unlock_irq(&conf->device_lock);
- if (!sh)
- break;
- if (atomic_read(&sh->count))
- BUG();
- shrink_buffers(sh, conf->raid_disks);
- kmem_cache_free(conf->slab_cache, sh);
- atomic_dec(&conf->active_stripes);
- }
+ spin_lock_irq(&conf->device_lock);
+ sh = get_free_stripe(conf);
+ spin_unlock_irq(&conf->device_lock);
+ if (!sh)
+ return 0;
+ if (atomic_read(&sh->count))
+ BUG();
+ shrink_buffers(sh, conf->raid_disks);
+ kmem_cache_free(conf->slab_cache, sh);
+ atomic_dec(&conf->active_stripes);
+ return 1;
+}
+
+static void shrink_stripes(raid5_conf_t *conf)
+{
+ while (drop_one_stripe(conf))
+ ;
+
kmem_cache_destroy(conf->slab_cache);
conf->slab_cache = NULL;
}
@@ -1718,6 +1731,108 @@ static void raid5d (mddev_t *mddev)
PRINTK("--- raid5d inactive\n");
}
+struct raid5_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(raid5_conf_t *, char *);
+ ssize_t (*store)(raid5_conf_t *, const char *, ssize_t);
+};
+
+static ssize_t
+raid5_show_stripe_cache_size(raid5_conf_t *conf, char *page)
+{
+ return sprintf(page, "%d\n", conf->max_nr_stripes);
+}
+
+static ssize_t
+raid5_store_stripe_cache_size(raid5_conf_t *conf, const char *page, ssize_t len)
+{
+ char *end;
+ int new;
+ if (len >= PAGE_SIZE)
+ return -EINVAL;
+
+ new = simple_strtoul(page, &end, 10);
+ if (!*page || (*end && *end != '\n') )
+ return -EINVAL;
+ if (new <= 16 || new > 32768)
+ return -EINVAL;
+ while (new < conf->max_nr_stripes) {
+ if (drop_one_stripe(conf))
+ conf->max_nr_stripes--;
+ else
+ break;
+ }
+ while (new > conf->max_nr_stripes) {
+ if (grow_one_stripe(conf))
+ conf->max_nr_stripes++;
+ else break;
+ }
+ return len;
+}
+static struct raid5_sysfs_entry raid5_stripecache_size = {
+ .attr = {.name = "stripe_cache_size", .mode = S_IRUGO | S_IWUSR },
+ .show = raid5_show_stripe_cache_size,
+ .store = raid5_store_stripe_cache_size,
+};
+
+static ssize_t
+raid5_show_stripe_cache_active(raid5_conf_t *conf, char *page)
+{
+ return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
+}
+
+static struct raid5_sysfs_entry raid5_stripecache_active = {
+ .attr = {.name = "stripe_cache_active", .mode = S_IRUGO},
+ .show = raid5_show_stripe_cache_active,
+};
+
+static struct attribute *raid5_default_attrs[] = {
+ &raid5_stripecache_size.attr,
+ &raid5_stripecache_active.attr,
+ NULL,
+};
+
+static ssize_t
+raid5_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct raid5_sysfs_entry *entry = container_of(attr, struct raid5_sysfs_entry, attr);
+ raid5_conf_t *conf = container_of(kobj, raid5_conf_t, kobj);
+
+ if (!entry->show)
+ return -EIO;
+ return entry->show(conf, page);
+}
+
+static ssize_t
+raid5_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct raid5_sysfs_entry *entry = container_of(attr, struct raid5_sysfs_entry, attr);
+ raid5_conf_t *conf = container_of(kobj, raid5_conf_t, kobj);
+
+ if (!entry->store)
+ return -EIO;
+ return entry->store(conf, page, length);
+}
+
+static void raid5_free(struct kobject *ko)
+{
+ raid5_conf_t *conf = container_of(ko, raid5_conf_t, kobj);
+ kfree(conf);
+}
+
+
+static struct sysfs_ops raid5_sysfs_ops = {
+ .show = raid5_attr_show,
+ .store = raid5_attr_store,
+};
+
+static struct kobj_type raid5_ktype = {
+ .release = raid5_free,
+ .sysfs_ops = &raid5_sysfs_ops,
+ .default_attrs = raid5_default_attrs,
+};
+
static int run(mddev_t *mddev)
{
raid5_conf_t *conf;
@@ -1859,6 +1974,10 @@ memory = conf->max_nr_stripes * (sizeof(
}
/* Ok, everything is just fine now */
+ conf->kobj.parent = kobject_get(&mddev->kobj);
+ strcpy(conf->kobj.name, "raid5");
+ conf->kobj.ktype = &raid5_ktype;
+ kobject_register(&conf->kobj);
if (mddev->bitmap)
mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
@@ -1883,7 +2002,7 @@ abort:
-static int stop (mddev_t *mddev)
+static int stop(mddev_t *mddev)
{
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
@@ -1892,7 +2011,7 @@ static int stop (mddev_t *mddev)
shrink_stripes(conf);
free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
- kfree(conf);
+ kobject_unregister(&conf->kobj);
mddev->private = NULL;
return 0;
}
diff ./include/linux/raid/raid5.h~current~ ./include/linux/raid/raid5.h
--- ./include/linux/raid/raid5.h~current~ 2005-10-04 13:08:41.000000000 +1000
+++ ./include/linux/raid/raid5.h 2005-10-04 12:57:00.000000000 +1000
@@ -228,6 +228,7 @@ struct raid5_private_data {
* Cleared when a sync completes.
*/
+ struct kobject kobj;
/*
* Free stripes pool
*/
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH md 004 of 5] Allow a manual resync with md
2005-10-04 5:23 [PATCH md 000 of 5] Introduction NeilBrown
` (2 preceding siblings ...)
2005-10-04 5:23 ` [PATCH md 003 of 5] Add kobject/sysfs support to raid5 NeilBrown
@ 2005-10-04 5:23 ` NeilBrown
2005-10-11 23:56 ` Andrew Morton
2005-10-11 23:57 ` Andrew Morton
2005-10-04 5:23 ` [PATCH md 005 of 5] Teach raid5 the difference between 'check' and 'repair' NeilBrown
4 siblings, 2 replies; 12+ messages in thread
From: NeilBrown @ 2005-10-04 5:23 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid
You can trigger a 'check' with
echo check > /sys/block/mdX/md/scan_mode
or a check-and-repair errors with
echo repair > /sys/block/mdX/md/scan_mode
and read the current state from the same file.
Note: personalities need to know the different between 'check' and 'repair',
but don't yet. Until they do, 'check' will be the same as 'repair' and
will just do a normal resync pass.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/md.c | 77 ++++++++++++++++++++++++++++++++++++++------
./include/linux/raid/md_k.h | 4 ++
2 files changed, 72 insertions(+), 9 deletions(-)
diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~ 2005-10-04 12:25:36.000000000 +1000
+++ ./drivers/md/md.c 2005-10-04 14:19:35.000000000 +1000
@@ -1714,9 +1714,60 @@ static struct md_sysfs_entry md_raid_dis
.show = md_show_rdisks,
};
+static ssize_t
+md_show_scan(mddev_t *mddev, char *page)
+{
+ char *type = "none";
+ if (mddev->recovery &
+ ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED))) {
+ if (mddev->recovery & (1<<MD_RECOVERY_SYNC)) {
+ if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ type = "resync";
+ else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+ type = "check";
+ else
+ type = "repair";
+ } else
+ type = "recover";
+ }
+ return sprintf(page, "%s\n", type);
+}
+
+static ssize_t
+md_store_scan(mddev_t *mddev, const char *page, size_t len)
+{
+ int canscan=0;
+ if (mddev->recovery &
+ ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED)))
+ return -EBUSY;
+ down(&mddev->reconfig_sem);
+ if (mddev->pers && mddev->pers->sync_request)
+ canscan=1;
+ up(&mddev->reconfig_sem);
+ if (!canscan)
+ return -EINVAL;
+
+ if (strcmp(page, "check")==0 || strcmp(page, "check\n")==0)
+ set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ else if (strcmp(page, "repair")!=0 && strcmp(page, "repair\n")!=0)
+ return -EINVAL;
+ set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+ set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ return len;
+}
+
+static struct md_sysfs_entry md_scan_mode = {
+ .attr = {.name = "scan_mode", .mode = S_IRUGO|S_IWUSR },
+ .show = md_show_scan,
+ .store = md_store_scan,
+};
+
static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_raid_disks.attr,
+ &md_scan_mode.attr,
NULL,
};
@@ -3851,7 +3902,8 @@ static void md_do_sync(mddev_t *mddev)
is_mddev_idle(mddev); /* this also initializes IO event counters */
/* we don't use the checkpoint if there's a bitmap */
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap)
+ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap
+ && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
j = mddev->recovery_cp;
else
j = 0;
@@ -4089,9 +4141,13 @@ void md_check_recovery(mddev_t *mddev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
}
- if (mddev->recovery)
- /* probably just the RECOVERY_NEEDED flag */
- mddev->recovery = 0;
+ /* Clear some bits that don't mean anything, but
+ * might be left set
+ */
+ clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
/* no recovery is running.
* remove any failed drives, then
@@ -4125,14 +4181,17 @@ void md_check_recovery(mddev_t *mddev)
}
}
- if (!spares && (mddev->recovery_cp == MaxSector )) {
- /* nothing we can do ... */
+ if (spares) {
+ clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ } else if (mddev->recovery_cp < MaxSector) {
+ set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+ /* nothing to be done ... */
goto unlock;
- }
+
if (mddev->pers->sync_request) {
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- if (!spares)
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
if (spares && mddev->bitmap && ! mddev->bitmap->file) {
/* We are adding a device or devices to an array
* which has the bitmap stored on all devices.
diff ./include/linux/raid/md_k.h~current~ ./include/linux/raid/md_k.h
--- ./include/linux/raid/md_k.h~current~ 2005-10-04 12:19:17.000000000 +1000
+++ ./include/linux/raid/md_k.h 2005-10-04 14:15:54.000000000 +1000
@@ -182,6 +182,8 @@ struct mddev_s
* ERR: and IO error was detected - abort the resync/recovery
* INTR: someone requested a (clean) early abort.
* DONE: thread is done and is waiting to be reaped
+ * REQUEST: user-space has requested a sync (used with SYNC)
+ * CHECK: user-space request for for check-only, no repair
*/
#define MD_RECOVERY_RUNNING 0
#define MD_RECOVERY_SYNC 1
@@ -189,6 +191,8 @@ struct mddev_s
#define MD_RECOVERY_INTR 3
#define MD_RECOVERY_DONE 4
#define MD_RECOVERY_NEEDED 5
+#define MD_RECOVERY_REQUESTED 6
+#define MD_RECOVERY_CHECK 7
unsigned long recovery;
int in_sync; /* know to not need resync */
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH md 004 of 5] Allow a manual resync with md
2005-10-04 5:23 ` [PATCH md 004 of 5] Allow a manual resync with md NeilBrown
@ 2005-10-11 23:56 ` Andrew Morton
2005-10-13 5:18 ` Neil Brown
2005-10-11 23:57 ` Andrew Morton
1 sibling, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2005-10-11 23:56 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-raid
NeilBrown <neilb@suse.de> wrote:
>
> static ssize_t
> +md_show_scan(mddev_t *mddev, char *page)
> +{
> + char *type = "none";
> + if (mddev->recovery &
> + ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED))) {
Shouldn't this be a bitwise OR?
> + if (mddev->recovery & (1<<MD_RECOVERY_SYNC)) {
> + if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
> + type = "resync";
> + else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
> + type = "check";
> + else
> + type = "repair";
> + } else
> + type = "recover";
> + }
> + return sprintf(page, "%s\n", type);
> +}
> +
> +static ssize_t
> +md_store_scan(mddev_t *mddev, const char *page, size_t len)
> +{
> + int canscan=0;
> + if (mddev->recovery &
> + ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED)))
And this?
> + return -EBUSY;
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH md 004 of 5] Allow a manual resync with md
2005-10-11 23:56 ` Andrew Morton
@ 2005-10-13 5:18 ` Neil Brown
0 siblings, 0 replies; 12+ messages in thread
From: Neil Brown @ 2005-10-13 5:18 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid
On Tuesday October 11, akpm@osdl.org wrote:
> NeilBrown <neilb@suse.de> wrote:
> >
> > static ssize_t
> > +md_show_scan(mddev_t *mddev, char *page)
> > +{
> > + char *type = "none";
> > + if (mddev->recovery &
> > + ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED))) {
>
> Shouldn't this be a bitwise OR?
Yes, though given that my testing showed it worked fine, there is
little practical difference!!
The constant value is becomes '1' instead of '33', and the '32' bit is
only set for extremely short periods of time, so not testing doesn't
make a visible difference!
I will, ofcourse, fix it.
Thanks for reviewing the patches.
NeilBrown
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH md 004 of 5] Allow a manual resync with md
2005-10-04 5:23 ` [PATCH md 004 of 5] Allow a manual resync with md NeilBrown
2005-10-11 23:56 ` Andrew Morton
@ 2005-10-11 23:57 ` Andrew Morton
1 sibling, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2005-10-11 23:57 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-raid
NeilBrown <neilb@suse.de> wrote:
>
> static ssize_t
> +md_show_scan(mddev_t *mddev, char *page)
> +{
> + char *type = "none";
> + if (mddev->recovery &
> + ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED))) {
> + if (mddev->recovery & (1<<MD_RECOVERY_SYNC)) {
> + if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
> + type = "resync";
> + else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
> + type = "check";
> + else
> + type = "repair";
> + } else
> + type = "recover";
> + }
> + return sprintf(page, "%s\n", type);
> +}
> +
> +static ssize_t
> +md_store_scan(mddev_t *mddev, const char *page, size_t len)
> +{
> + int canscan=0;
> + if (mddev->recovery &
> + ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED)))
> + return -EBUSY;
I'd be inclined to just use test_bit() here - it's pretty cheap.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH md 005 of 5] Teach raid5 the difference between 'check' and 'repair'.
2005-10-04 5:23 [PATCH md 000 of 5] Introduction NeilBrown
` (3 preceding siblings ...)
2005-10-04 5:23 ` [PATCH md 004 of 5] Allow a manual resync with md NeilBrown
@ 2005-10-04 5:23 ` NeilBrown
4 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2005-10-04 5:23 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-raid
With this, raid5 can be asked to check parity without
repairing it. It also keeps a count of the number of
incorrect parity blocks found (mismatches) and reports
them through sysfs.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/md.c | 18 ++++++++++++++++--
./drivers/md/raid5.c | 5 +++++
./include/linux/raid/md_k.h | 4 ++++
3 files changed, 25 insertions(+), 2 deletions(-)
diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~ 2005-10-04 14:19:35.000000000 +1000
+++ ./drivers/md/md.c 2005-10-04 14:42:04.000000000 +1000
@@ -1758,16 +1758,29 @@ md_store_scan(mddev_t *mddev, const char
return len;
}
+static ssize_t
+md_show_mismatch(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (unsigned long long) mddev->resync_mismatches);
+}
+
static struct md_sysfs_entry md_scan_mode = {
.attr = {.name = "scan_mode", .mode = S_IRUGO|S_IWUSR },
.show = md_show_scan,
.store = md_store_scan,
};
+static struct md_sysfs_entry md_mismatches = {
+ .attr = {.name = "mismatch_cnt", .mode = S_IRUGO },
+ .show = md_show_mismatch,
+};
+
static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_raid_disks.attr,
&md_scan_mode.attr,
+ &md_mismatches.attr,
NULL,
};
@@ -3884,12 +3897,13 @@ static void md_do_sync(mddev_t *mddev)
}
} while (mddev->curr_resync < 2);
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
/* resync follows the size requested by the personality,
* which defaults to physical size, but can be virtual size
*/
max_sectors = mddev->resync_max_sectors;
- else
+ mddev->resync_mismatches = 0;
+ } else
/* recovery follows the physical size of devices */
max_sectors = mddev->size << 1;
diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~ 2005-10-04 13:08:44.000000000 +1000
+++ ./drivers/md/raid5.c 2005-10-04 14:42:32.000000000 +1000
@@ -1292,6 +1292,11 @@ static void handle_stripe(struct stripe_
!memcmp(pagea, pagea+4, STRIPE_SIZE-4)) {
/* parity is correct (on disc, not in buffer any more) */
set_bit(STRIPE_INSYNC, &sh->state);
+ } else {
+ conf->mddev->resync_mismatches += STRIPE_SECTORS;
+ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ /* don't try to repair!! */
+ set_bit(STRIPE_INSYNC, &sh->state);
}
}
if (!test_bit(STRIPE_INSYNC, &sh->state)) {
diff ./include/linux/raid/md_k.h~current~ ./include/linux/raid/md_k.h
--- ./include/linux/raid/md_k.h~current~ 2005-10-04 14:15:54.000000000 +1000
+++ ./include/linux/raid/md_k.h 2005-10-04 14:35:29.000000000 +1000
@@ -175,6 +175,10 @@ struct mddev_s
sector_t resync_mark_cnt;/* blocks written at resync_mark */
sector_t resync_max_sectors; /* may be set by personality */
+
+ sector_t resync_mismatches; /* count of sectors where
+ * parity/replica mismatch found
+ */
/* recovery/resync flags
* NEEDED: we might need to start a resync/recover
* RUNNING: a thread is running, or about to be started
^ permalink raw reply [flat|nested] 12+ messages in thread