* [RFC] embryonic RAID class
@ 2005-08-16 23:27 James Bottomley
0 siblings, 0 replies; 5+ messages in thread
From: James Bottomley @ 2005-08-16 23:27 UTC (permalink / raw)
To: SCSI Mailing List
The idea behind a RAID class is to provide a uniform interface to all
RAID subsystems (both hardware and software) in the kernel.
To do that, I've made this class a transport class that's entirely
subsystem independent (although the matching routines have to match per
subsystem, as you'll see looking at the code). I put it in the scsi
subdirectory purely because I needed somewhere to play with it, but it's
not a scsi specific module.
I used a fusion raid card as the test bed for this; with that kind of
card, this is the type of class output you get:
jejb@titanic> ls -l /sys/class/raid_devices/20\:0\:0\:0/
total 0
lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-0 -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:0/20:1:0:0/
lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-1 -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:1/20:1:1:0/
lrwxrwxrwx 1 root root 0 Aug 16 17:21 device -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:0:0/20:0:0:0/
-r--r--r-- 1 root root 16384 Aug 16 17:21 level
-r--r--r-- 1 root root 16384 Aug 16 17:21 resync
-r--r--r-- 1 root root 16384 Aug 16 17:21 state
So it's really simple: for a SCSI device representing a hardware raid,
it shows the raid level, the array state, the resync % complete (if the
state is resyncing) and the underlying components of the RAID (these are
exposed in fusion on the virtual channel 1).
As you can see, this type of information can be exported by almost
anything, including software raid.
The more difficult trick, of course, is going to be getting it to
perform configuration type actions with writable attributes.
James
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1,5 +1,11 @@
menu "SCSI device support"
+config RAID_ATTRS
+ tristate "RAID Transport Class"
+ default n
+ ---help---
+ Provides RAID
+
config SCSI
tristate "SCSI device support"
---help---
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -22,6 +22,8 @@ subdir-$(CONFIG_PCMCIA) += pcmcia
obj-$(CONFIG_SCSI) += scsi_mod.o
+obj-$(CONFIG_RAID_ATTRS) += raid_class.o
+
# --- NOTE ORDERING HERE ---
# For kernel non-modular link, transport attributes need to
# be initialised before drivers
diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c
new file mode 100644
--- /dev/null
+++ b/drivers/scsi/raid_class.c
@@ -0,0 +1,250 @@
+/*
+ * RAID Attributes
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/raid_class.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+
+#define RAID_NUM_ATTRS 3
+
+struct raid_internal {
+ struct raid_template r;
+ struct raid_function_template *f;
+ /* The actual attributes */
+ struct class_device_attribute private_attrs[RAID_NUM_ATTRS];
+ /* The array of null terminated pointers to attributes
+ * needed by scsi_sysfs.c */
+ struct class_device_attribute *attrs[RAID_NUM_ATTRS + 1];
+};
+
+struct raid_component {
+ struct list_head node;
+ struct device *dev;
+ int num;
+};
+
+#define to_raid_internal(tmpl) container_of(tmpl, struct raid_internal, r)
+
+#define tc_to_raid_internal(tcont) ({ \
+ struct raid_template *r = \
+ container_of(tcont, struct raid_template, raid_attrs); \
+ to_raid_internal(r); \
+})
+
+#define ac_to_raid_internal(acont) ({ \
+ struct transport_container *tc = \
+ container_of(acont, struct transport_container, ac); \
+ tc_to_raid_internal(tc); \
+})
+
+#define class_device_to_raid_internal(cdev) ({ \
+ struct attribute_container *ac = \
+ attribute_container_classdev_to_container(cdev); \
+ ac_to_raid_internal(ac); \
+})
+
+
+static int raid_match(struct attribute_container *cont, struct device *dev)
+{
+ /* We have to look for every subsystem that could house
+ * emulated RAID devices, so start with SCSI */
+ struct raid_internal *i = ac_to_raid_internal(cont);
+
+ if (scsi_is_sdev_device(dev)) {
+ struct scsi_device *sdev = to_scsi_device(dev);
+
+ if (i->f->cookie != sdev->host->hostt)
+ return 0;
+
+ return i->f->is_raid(dev);
+ }
+ /* FIXME: look at other subsystems too */
+ return 0;
+}
+
+static int raid_setup(struct transport_container *tc, struct device *dev,
+ struct class_device *cdev)
+{
+ struct raid_data *rd;
+
+ BUG_ON(class_get_devdata(cdev));
+
+ rd = kmalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ memset(rd, 0, sizeof(*rd));
+ INIT_LIST_HEAD(&rd->component_list);
+ class_set_devdata(cdev, rd);
+
+ return 0;
+}
+
+static int raid_remove(struct transport_container *tc, struct device *dev,
+ struct class_device *cdev)
+{
+ struct raid_data *rd = class_get_devdata(cdev);
+ struct raid_component *rc, *next;
+ class_set_devdata(cdev, NULL);
+ list_for_each_entry_safe(rc, next, &rd->component_list, node) {
+ char buf[40];
+ snprintf(buf, sizeof(buf), "component-%d", rc->num);
+ list_del(&rc->node);
+ sysfs_remove_link(&cdev->kobj, buf);
+ kfree(rc);
+ }
+ kfree(class_get_devdata(cdev));
+ return 0;
+}
+
+static DECLARE_TRANSPORT_CLASS(raid_class,
+ "raid_devices",
+ raid_setup,
+ raid_remove,
+ NULL);
+
+static struct {
+ enum raid_state value;
+ char *name;
+} raid_states[] = {
+ { RAID_ACTIVE, "active" },
+ { RAID_DEGRADED, "degraded" },
+ { RAID_RESYNCING, "resyncing" },
+ { RAID_OFFLINE, "offline" },
+};
+
+static const char *raid_state_name(enum raid_state state)
+{
+ int i;
+ char *name = NULL;
+
+ for (i = 0; i < sizeof(raid_states)/sizeof(raid_states[0]); i++) {
+ if (raid_states[i].value == state) {
+ name = raid_states[i].name;
+ break;
+ }
+ }
+ return name;
+}
+
+
+#define raid_attr_show_internal(attr, fmt, var, code) \
+static ssize_t raid_show_##attr(struct class_device *cdev, char *buf) \
+{ \
+ struct raid_data *rd = class_get_devdata(cdev); \
+ code \
+ return snprintf(buf, 20, #fmt "\n", var); \
+}
+
+#define raid_attr_ro_states(attr, states, code) \
+raid_attr_show_internal(attr, %s, name, \
+ const char *name; \
+ code \
+ name = raid_##states##_name(rd->attr); \
+) \
+static CLASS_DEVICE_ATTR(attr, S_IRUGO, raid_show_##attr, NULL)
+
+
+#define raid_attr_ro_internal(attr, code) \
+raid_attr_show_internal(attr, %d, rd->attr, code) \
+static CLASS_DEVICE_ATTR(attr, S_IRUGO, raid_show_##attr, NULL)
+
+#define ATTR_CODE(attr) \
+ struct raid_internal *i = class_device_to_raid_internal(cdev); \
+ if (i->f->get_##attr) \
+ i->f->get_##attr(cdev->dev);
+
+#define raid_attr_ro(attr) raid_attr_ro_internal(attr, )
+#define raid_attr_ro_fn(attr) raid_attr_ro_internal(attr, ATTR_CODE(attr))
+#define raid_attr_ro_state(attr) raid_attr_ro_states(attr, attr, ATTR_CODE(attr))
+
+raid_attr_ro(level);
+raid_attr_ro_fn(resync);
+raid_attr_ro_state(state);
+
+void raid_component_add(struct raid_template *r,struct device *raid_dev,
+ struct device *component_dev)
+{
+ struct class_device *cdev =
+ attribute_container_find_class_device(&r->raid_attrs.ac,
+ raid_dev);
+ struct raid_component *rc;
+ struct raid_data *rd = class_get_devdata(cdev);
+ char buf[40];
+
+ rc = kmalloc(sizeof(*rc), GFP_KERNEL);
+ if (!rc)
+ return;
+
+ INIT_LIST_HEAD(&rc->node);
+ rc->dev = component_dev;
+ rc->num = rd->component_count++;
+
+ snprintf(buf, sizeof(buf), "component-%d", rc->num);
+ list_add_tail(&rc->node, &rd->component_list);
+ sysfs_create_link(&cdev->kobj, &component_dev->kobj, buf);
+}
+EXPORT_SYMBOL(raid_component_add);
+
+struct raid_template *
+raid_class_attach(struct raid_function_template *ft)
+{
+ struct raid_internal *i = kmalloc(sizeof(struct raid_internal),
+ GFP_KERNEL);
+ int count = 0;
+
+ if (unlikely(!i))
+ return NULL;
+
+ memset(i, 0, sizeof(*i));
+
+ i->f = ft;
+
+ i->r.raid_attrs.ac.class = &raid_class.class;
+ i->r.raid_attrs.ac.match = raid_match;
+ i->r.raid_attrs.ac.attrs = &i->attrs[0];
+
+ attribute_container_register(&i->r.raid_attrs.ac);
+
+ i->attrs[count++] = &class_device_attr_level;
+ i->attrs[count++] = &class_device_attr_resync;
+ i->attrs[count++] = &class_device_attr_state;
+
+ i->attrs[count] = NULL;
+ BUG_ON(count > RAID_NUM_ATTRS);
+
+ return &i->r;
+}
+EXPORT_SYMBOL(raid_class_attach);
+
+void
+raid_class_release(struct raid_template *r)
+{
+ struct raid_internal *i = to_raid_internal(r);
+
+ attribute_container_unregister(&i->r.raid_attrs.ac);
+
+ kfree(i);
+}
+EXPORT_SYMBOL(raid_class_release);
+
+static __init int raid_init(void)
+{
+ return transport_class_register(&raid_class);
+}
+
+static __exit void raid_exit(void)
+{
+ transport_class_unregister(&raid_class);
+}
+
+MODULE_AUTHOR("James Bottomley");
+MODULE_DESCRIPTION("RAID device class");
+MODULE_LICENSE("GPL");
+
+module_init(raid_init);
+module_exit(raid_exit);
+
diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h
new file mode 100644
--- /dev/null
+++ b/include/linux/raid_class.h
@@ -0,0 +1,59 @@
+/*
+ */
+#include <linux/transport_class.h>
+
+struct raid_template {
+ struct transport_container raid_attrs;
+};
+
+struct raid_function_template {
+ void *cookie;
+ int (*is_raid)(struct device *);
+ void (*get_resync)(struct device *);
+ void (*get_state)(struct device *);
+};
+
+enum raid_state {
+ RAID_ACTIVE = 1,
+ RAID_DEGRADED,
+ RAID_RESYNCING,
+ RAID_OFFLINE,
+};
+
+struct raid_data {
+ struct list_head component_list;
+ int component_count;
+ int level;
+ enum raid_state state;
+ int resync;
+};
+
+#define DEFINE_RAID_ATTRIBUTE(type, attr) \
+static inline void \
+raid_set_##attr(struct raid_template *r, struct device *dev, type value) { \
+ struct class_device *cdev = \
+ attribute_container_find_class_device(&r->raid_attrs.ac, dev);\
+ struct raid_data *rd; \
+ BUG_ON(!cdev); \
+ rd = class_get_devdata(cdev); \
+ rd->attr = value; \
+} \
+static inline type \
+raid_get_##attr(struct raid_template *r, struct device *dev) { \
+ struct class_device *cdev = \
+ attribute_container_find_class_device(&r->raid_attrs.ac, dev);\
+ struct raid_data *rd; \
+ BUG_ON(!cdev); \
+ rd = class_get_devdata(cdev); \
+ return rd->attr; \
+}
+
+DEFINE_RAID_ATTRIBUTE(int, level)
+DEFINE_RAID_ATTRIBUTE(int, resync)
+DEFINE_RAID_ATTRIBUTE(enum raid_state, state)
+
+struct raid_template *raid_class_attach(struct raid_function_template *);
+void raid_class_release(struct raid_template *);
+
+void raid_component_add(struct raid_template *, struct device *,
+ struct device *);
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [RFC] embryonic RAID class
@ 2005-08-17 20:25 Salyzyn, Mark
2005-08-17 22:45 ` James Bottomley
0 siblings, 1 reply; 5+ messages in thread
From: Salyzyn, Mark @ 2005-08-17 20:25 UTC (permalink / raw)
To: James Bottomley, SCSI Mailing List
Under aac based adapters the build task reports the task number, current
array water mark and end of array but not the array number associated
with the build task in the AIF events returning from the Firmware. The
management applications generally issue a series of enumerations to the
devices and the arrays to build a set of objects, one of these object
attributes is the build task associated with the array (if it is
rebuilding).
Really simple to report the percentage completed as the driver already
sniffs the AIFs to generate hot-add and hot-remove actions, much more
difficult and quite possibly driver bloating to associate that build
task status with an array or underlying targets.
Compromise? Well, what about exporting an raid adapter class and having
'resync' associated with that adapter object? Array creation will no
doubt have to go to the raid adapter class in any case, so it will be a
requirement if configuration is to be allowed in this RAID class.
Add hotspare-0, hotspare-1 ... to the list?
A target id written to resync would be used to 'suck in' a hotspare or
trigger a rebuild to a failed target?
Sincerely -- Mark Salyzyn
-----Original Message-----
From: linux-scsi-owner@vger.kernel.org
[mailto:linux-scsi-owner@vger.kernel.org] On Behalf Of James Bottomley
Sent: Tuesday, August 16, 2005 7:28 PM
To: SCSI Mailing List
Subject: [RFC] embryonic RAID class
The idea behind a RAID class is to provide a uniform interface to all
RAID subsystems (both hardware and software) in the kernel.
To do that, I've made this class a transport class that's entirely
subsystem independent (although the matching routines have to match per
subsystem, as you'll see looking at the code). I put it in the scsi
subdirectory purely because I needed somewhere to play with it, but it's
not a scsi specific module.
I used a fusion raid card as the test bed for this; with that kind of
card, this is the type of class output you get:
jejb@titanic> ls -l /sys/class/raid_devices/20\:0\:0\:0/
total 0
lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-0 ->
../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:0/20:1:0:0/
lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-1 ->
../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:1/20:1:1:0/
lrwxrwxrwx 1 root root 0 Aug 16 17:21 device ->
../../../devices/pci0000:80/0000:80:04.0/host20/target20:0:0/20:0:0:0/
-r--r--r-- 1 root root 16384 Aug 16 17:21 level
-r--r--r-- 1 root root 16384 Aug 16 17:21 resync
-r--r--r-- 1 root root 16384 Aug 16 17:21 state
So it's really simple: for a SCSI device representing a hardware raid,
it shows the raid level, the array state, the resync % complete (if the
state is resyncing) and the underlying components of the RAID (these are
exposed in fusion on the virtual channel 1).
As you can see, this type of information can be exported by almost
anything, including software raid.
The more difficult trick, of course, is going to be getting it to
perform configuration type actions with writable attributes.
James
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [RFC] embryonic RAID class
2005-08-17 20:25 [RFC] embryonic RAID class Salyzyn, Mark
@ 2005-08-17 22:45 ` James Bottomley
2005-08-18 12:44 ` Matt Domsch
2005-08-18 17:16 ` Luben Tuikov
0 siblings, 2 replies; 5+ messages in thread
From: James Bottomley @ 2005-08-17 22:45 UTC (permalink / raw)
To: Salyzyn, Mark; +Cc: SCSI Mailing List
On Wed, 2005-08-17 at 16:25 -0400, Salyzyn, Mark wrote:
> Under aac based adapters the build task reports the task number, current
> array water mark and end of array but not the array number associated
> with the build task in the AIF events returning from the Firmware. The
> management applications generally issue a series of enumerations to the
> devices and the arrays to build a set of objects, one of these object
> attributes is the build task associated with the array (if it is
> rebuilding).
>
> Really simple to report the percentage completed as the driver already
> sniffs the AIFs to generate hot-add and hot-remove actions, much more
> difficult and quite possibly driver bloating to associate that build
> task status with an array or underlying targets.
Yes, fusion also necessitates some digging to get the correct
information.
> Compromise? Well, what about exporting an raid adapter class and having
> 'resync' associated with that adapter object? Array creation will no
> doubt have to go to the raid adapter class in any case, so it will be a
> requirement if configuration is to be allowed in this RAID class.
But isn't that what you currently have? The HBA exports a get_resync()
method via the function templates which the raid class makes use of.
I admit that the whole thing will get hugely complex if configuration
becomes allowable via this class, but at the moment I'm just trying to
get at useful information display ... configuration can come later.
> Add hotspare-0, hotspare-1 ... to the list?
Yes ... I only have two disks though, so I've reached the limit of what
I can do with my current fusion setup. The whole of the component
display has to be reworked anyway (I only provide add, but obviously
delete should be allowed as well as component type). I'll add it to the
list. Probably we also need a component state model as well (and
perhaps spare would be one of the states).
> A target id written to resync would be used to 'suck in' a hotspare or
> trigger a rebuild to a failed target?
Actually, probably do that via an echo to the state. The user would be
allowed to kick certain state transitions, and degraded->resyncing
should probably be one of them.
James
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC] embryonic RAID class
2005-08-17 22:45 ` James Bottomley
@ 2005-08-18 12:44 ` Matt Domsch
2005-08-18 17:16 ` Luben Tuikov
1 sibling, 0 replies; 5+ messages in thread
From: Matt Domsch @ 2005-08-18 12:44 UTC (permalink / raw)
To: James Bottomley; +Cc: Salyzyn, Mark, SCSI Mailing List
On Wed, Aug 17, 2005 at 06:45:25PM -0400, James Bottomley wrote:
> > Add hotspare-0, hotspare-1 ... to the list?
>
> Yes ... I only have two disks though, so I've reached the limit of what
> I can do with my current fusion setup. The whole of the component
> display has to be reworked anyway (I only provide add, but obviously
> delete should be allowed as well as component type). I'll add it to the
> list. Probably we also need a component state model as well (and
> perhaps spare would be one of the states).
Different controllers have different hot spare capabilities. Some hot
spares are global to the whole array and can fill in if any disk
fails. Others are tied to either a group of logical volumes, or to a
single logical volume. The first and third associations are easy, the
second association will require a new group concept.
Thanks,
Matt
--
Matt Domsch
Software Architect
Dell Linux Solutions linux.dell.com & www.dell.com/linux
Linux on Dell mailing lists @ http://lists.us.dell.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC] embryonic RAID class
2005-08-17 22:45 ` James Bottomley
2005-08-18 12:44 ` Matt Domsch
@ 2005-08-18 17:16 ` Luben Tuikov
1 sibling, 0 replies; 5+ messages in thread
From: Luben Tuikov @ 2005-08-18 17:16 UTC (permalink / raw)
To: James Bottomley; +Cc: Salyzyn, Mark, SCSI Mailing List
On 08/17/05 18:45, James Bottomley wrote:
> I admit that the whole thing will get hugely complex if configuration
> becomes allowable via this class, but at the moment I'm just trying to
> get at useful information display ... configuration can come later.
James are you saying that you do not have a _vision_ of how the
*whole thing* would look?
Luben
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2005-08-18 19:34 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-17 20:25 [RFC] embryonic RAID class Salyzyn, Mark
2005-08-17 22:45 ` James Bottomley
2005-08-18 12:44 ` Matt Domsch
2005-08-18 17:16 ` Luben Tuikov
-- strict thread matches above, loose matches on Subject: below --
2005-08-16 23:27 James Bottomley
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).