* [PATCH] 2.4.18-26.7.x /proc/mdstat seq_file
@ 2003-03-11 20:56 Rechenberg, Andrew
2003-03-13 1:03 ` Neil Brown
0 siblings, 1 reply; 3+ messages in thread
From: Rechenberg, Andrew @ 2003-03-11 20:56 UTC (permalink / raw)
To: linux-raid
[-- Attachment #1: Type: text/plain, Size: 1375 bytes --]
I figured I would follow proper protocol and put PATCH in the subject
line. As stated in my previous email, this patch fixes my problem of
kernel OOPS with a large number of software RAID arrays (I have 27).
Below is the text from the original mail.
-------------------------------------------
With the help of Martin Bligh, Kevin Fleming, and Randy Dunlap, it looks
like this problem is related to the large size of the information
presented in /proc/mdstat and it overflowing the 4k page boundary.
With Kevin's patch from last week and some help from Randy, I patched
the md code in Red Hat 2.4.18-26.7.x to use the seq_file interface for
mdstat. I've attached the patch. As with Kevin's patch, it touches
almost everything in drivers/md, as well as adding the necessary methods
to fs/seq_file.c and include/linux/seq_file.h.
I'm currently testing raid1 and raid0 and it seems to work well. No
panics yet!!! :) I currently have 26 RAID1 arrays and a big RAID0
stripe across that and I'm running some I/O tests on it now to make sure
that it is stable.
I haven't tested the raid5, linear, or multipath code, so someone might
want to test that out before using it in production :)
As Kevin indicated in his mail, I can post the patch to a web site if
attachments are a problem.
Thanks to everyone for their help.
Regards,
Andy.
[-- Attachment #2: md-seq_file-2.4.18-26.7.x.patch --]
[-- Type: application/octet-stream, Size: 14646 bytes --]
--- ../linux-2.4.18-26.7.x/drivers/md/linear.c Sun Sep 30 15:26:06 2001
+++ ./drivers/md/linear.c Tue Mar 11 11:09:04 2003
@@ -22,6 +22,7 @@
#include <linux/slab.h>
#include <linux/raid/linear.h>
+#include <linux/seq_file.h>
#define MAJOR_NR MD_MAJOR
#define MD_DRIVER
@@ -153,31 +154,29 @@
return 1;
}
-static int linear_status (char *page, mddev_t *mddev)
+static void linear_status (struct seq_file *seq, mddev_t *mddev)
{
- int sz = 0;
#undef MD_DEBUG
#ifdef MD_DEBUG
int j;
linear_conf_t *conf = mddev_to_conf(mddev);
- sz += sprintf(page+sz, " ");
+ seq_printf(seq, " ");
for (j = 0; j < conf->nr_zones; j++)
{
- sz += sprintf(page+sz, "[%s",
+ seq_printf(seq, "[%s",
partition_name(conf->hash_table[j].dev0->dev));
if (conf->hash_table[j].dev1)
- sz += sprintf(page+sz, "/%s] ",
+ seq_printf(seq, "/%s] ",
partition_name(conf->hash_table[j].dev1->dev));
else
- sz += sprintf(page+sz, "] ");
+ seq_printf(seq, "] ");
}
- sz += sprintf(page+sz, "\n");
+ seq_printf(seq, "\n");
#endif
- sz += sprintf(page+sz, " %dk rounding", mddev->param.chunk_size/1024);
- return sz;
+ seq_printf(seq, " %dk rounding", mddev->param.chunk_size/1024);
}
--- ../linux-2.4.18-26.7.x/drivers/md/md.c Mon Feb 24 09:15:39 2003
+++ ./drivers/md/md.c Tue Mar 11 12:25:46 2003
@@ -36,6 +36,8 @@
#include <linux/devfs_fs_kernel.h>
#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#ifdef CONFIG_KMOD
#include <linux/kmod.h>
@@ -128,6 +130,15 @@
fops: &md_fops,
};
+static int md_state_open_fs(struct inode *inode, struct file *file);
+
+static struct file_operations md_state_fops = {
+ open: md_state_open_fs,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: single_release,
+};
+
/*
* Enables to iterate over all existing md arrays
*/
@@ -3070,13 +3081,13 @@
return 0;
}
-static int status_unused(char * page)
+static void status_unused(struct seq_file * seq)
{
- int sz = 0, i = 0;
+ int i = 0;
mdk_rdev_t *rdev;
struct md_list_head *tmp;
- sz += sprintf(page + sz, "unused devices: ");
+ seq_printf(seq, "unused devices: ");
ITERATE_RDEV_ALL(rdev,tmp) {
if (!rdev->same_set.next && !rdev->same_set.prev) {
@@ -3084,21 +3095,19 @@
* The device is not yet used by any array.
*/
i++;
- sz += sprintf(page + sz, "%s ",
+ seq_printf(seq, "%s ",
partition_name(rdev->dev));
}
}
if (!i)
- sz += sprintf(page + sz, "<none>");
+ seq_printf(seq, "<none>");
- sz += sprintf(page + sz, "\n");
- return sz;
+ seq_printf(seq, "\n");
}
-static int status_resync(char * page, mddev_t * mddev)
+static void status_resync(struct seq_file * seq, mddev_t * mddev)
{
- int sz = 0;
unsigned long max_blocks, resync, res, dt, db, rt;
resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
@@ -3109,30 +3118,29 @@
*/
if (!max_blocks) {
MD_BUG();
- return 0;
}
res = (resync/1024)*1000/(max_blocks/1024 + 1);
{
int i, x = res/50, y = 20-x;
- sz += sprintf(page + sz, "[");
+ seq_printf(seq, "[");
for (i = 0; i < x; i++)
- sz += sprintf(page + sz, "=");
- sz += sprintf(page + sz, ">");
+ seq_printf(seq, "=");
+ seq_printf(seq, ">");
for (i = 0; i < y; i++)
- sz += sprintf(page + sz, ".");
- sz += sprintf(page + sz, "] ");
+ seq_printf(seq, ".");
+ seq_printf(seq, "] ");
}
if (!mddev->recovery_running)
/*
* true resync
*/
- sz += sprintf(page + sz, " resync =%3lu.%lu%% (%lu/%lu)",
+ seq_printf(seq, " resync =%3lu.%lu%% (%lu/%lu)",
res/10, res % 10, resync, max_blocks);
else
/*
* recovery ...
*/
- sz += sprintf(page + sz, " recovery =%3lu.%lu%% (%lu/%lu)",
+ seq_printf(seq, " recovery =%3lu.%lu%% (%lu/%lu)",
res/10, res % 10, resync, max_blocks);
/*
@@ -3149,50 +3157,47 @@
db = resync - (mddev->resync_mark_cnt/2);
rt = (dt * ((max_blocks-resync) / (db/100+1)))/100;
- sz += sprintf(page + sz, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
+ seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
- sz += sprintf(page + sz, " speed=%ldK/sec", db/dt);
-
- return sz;
+ seq_printf(seq, " speed=%ldK/sec", db/dt);
}
-static int md_status_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static int md_state_seq_show(struct seq_file *seq, void *dummy)
{
- int sz = 0, j, size;
+ int j, size;
struct md_list_head *tmp, *tmp2;
mdk_rdev_t *rdev;
mddev_t *mddev;
- sz += sprintf(page + sz, "Personalities : ");
+ seq_printf(seq, "Personalities : ");
for (j = 0; j < MAX_PERSONALITY; j++)
if (pers[j])
- sz += sprintf(page+sz, "[%s] ", pers[j]->name);
+ seq_printf(seq, "[%s] ", pers[j]->name);
- sz += sprintf(page+sz, "\n");
+ seq_printf(seq, "\n");
- sz += sprintf(page+sz, "read_ahead ");
+ seq_printf(seq, "read_ahead ");
if (read_ahead[MD_MAJOR] == INT_MAX)
- sz += sprintf(page+sz, "not set\n");
+ seq_printf(seq, "not set\n");
else
- sz += sprintf(page+sz, "%d sectors\n", read_ahead[MD_MAJOR]);
+ seq_printf(seq, "%d sectors\n", read_ahead[MD_MAJOR]);
ITERATE_MDDEV(mddev,tmp) {
- sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev),
+ seq_printf(seq, "md%d : %sactive", mdidx(mddev),
mddev->pers ? "" : "in");
if (mddev->pers) {
if (mddev->ro)
- sz += sprintf(page + sz, " (read-only)");
- sz += sprintf(page + sz, " %s", mddev->pers->name);
+ seq_printf(seq, " (read-only)");
+ seq_printf(seq, " %s", mddev->pers->name);
}
size = 0;
ITERATE_RDEV(mddev,rdev,tmp2) {
- sz += sprintf(page + sz, " %s[%d]",
+ seq_printf(seq, " %s[%d]",
partition_name(rdev->dev), rdev->desc_nr);
if (rdev->faulty) {
- sz += sprintf(page + sz, "(F)");
+ seq_printf(seq, "(F)");
continue;
}
size += rdev->size;
@@ -3200,33 +3205,40 @@
if (mddev->nb_dev) {
if (mddev->pers)
- sz += sprintf(page + sz, "\n %d blocks",
+ seq_printf(seq, "\n %d blocks",
md_size[mdidx(mddev)]);
else
- sz += sprintf(page + sz, "\n %d blocks", size);
+ seq_printf(seq, "\n %d blocks", size);
}
if (!mddev->pers) {
- sz += sprintf(page+sz, "\n");
+ seq_printf(seq, "\n");
continue;
}
- sz += mddev->pers->status (page+sz, mddev);
+ mddev->pers->status (seq, mddev);
- sz += sprintf(page+sz, "\n ");
+ seq_printf(seq, "\n ");
if (mddev->curr_resync) {
- sz += status_resync (page+sz, mddev);
+ status_resync (seq, mddev);
} else {
if (md_atomic_read(&mddev->resync_sem.count) != 1)
- sz += sprintf(page + sz, " resync=DELAYED");
+ seq_printf(seq, " resync=DELAYED");
}
- sz += sprintf(page + sz, "\n");
+ seq_printf(seq, "\n");
}
- sz += status_unused(page + sz);
+ status_unused(seq);
- return sz;
+ return 0;
}
+
+static int md_state_open_fs(struct inode *inode, struct file *file)
+{
+ return single_open(file, md_state_seq_show, NULL);
+}
+
+
int register_md_personality(int pnum, mdk_personality_t *p)
{
if (pnum >= MAX_PERSONALITY) {
@@ -3633,15 +3645,13 @@
dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
-#ifdef CONFIG_PROC_FS
- create_proc_read_entry("mdstat", 0, NULL, md_status_read_proc, NULL);
-#endif
}
int md__init md_init(void)
{
static char * name = "mdrecoveryd";
int minor;
+ struct proc_dir_entry * entry = NULL;
printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d, MD_SB_DISKS=%d\n",
MD_MAJOR_VERSION, MD_MINOR_VERSION,
@@ -3678,6 +3688,13 @@
raid_table_header = register_sysctl_table(raid_root_table, 1);
md_geninit();
+ entry = create_proc_entry("mdstat", S_IRUGO, NULL);
+ if (!entry)
+ printk(KERN_ALERT
+ "md: bug: couldn't create /proc/mdstat\n");
+ else {
+ entry->proc_fops = &md_state_fops;
+ }
return (0);
}
@@ -4005,9 +4022,7 @@
devfs_unregister_blkdev(MAJOR_NR,"md");
unregister_reboot_notifier(&md_notifier);
unregister_sysctl_table(raid_table_header);
-#ifdef CONFIG_PROC_FS
remove_proc_entry("mdstat", NULL);
-#endif
del_gendisk(&md_gendisk);
--- ../linux-2.4.18-26.7.x/drivers/md/multipath.c Mon Feb 25 14:37:58 2002
+++ ./drivers/md/multipath.c Tue Mar 11 11:09:04 2003
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/raid/multipath.h>
#include <asm/atomic.h>
+#include <linux/seq_file.h>
#define MAJOR_NR MD_MAJOR
#define MD_DRIVER
@@ -281,18 +282,17 @@
return 0;
}
-static int multipath_status (char *page, mddev_t *mddev)
+static void multipath_status (struct seq_file *seq, mddev_t *mddev)
{
multipath_conf_t *conf = mddev_to_conf(mddev);
- int sz = 0, i;
+ int i;
- sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks,
+ seq_printf (seq, " [%d/%d] [", conf->raid_disks,
conf->working_disks);
for (i = 0; i < conf->raid_disks; i++)
- sz += sprintf (page+sz, "%s",
+ seq_printf (seq, "%s",
conf->multipaths[i].operational ? "U" : "_");
- sz += sprintf (page+sz, "]");
- return sz;
+ seq_printf (seq, "]");
}
#define LAST_DISK KERN_ALERT \
--- ../linux-2.4.18-26.7.x/drivers/md/raid0.c Sun Sep 30 15:26:06 2001
+++ ./drivers/md/raid0.c Tue Mar 11 11:09:04 2003
@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/raid/raid0.h>
+#include <linux/seq_file.h>
#define MAJOR_NR MD_MAJOR
#define MD_DRIVER
@@ -289,41 +290,38 @@
return 0;
}
-static int raid0_status (char *page, mddev_t *mddev)
+static void raid0_status (struct seq_file *seq, mddev_t *mddev)
{
- int sz = 0;
#undef MD_DEBUG
#ifdef MD_DEBUG
int j, k;
raid0_conf_t *conf = mddev_to_conf(mddev);
- sz += sprintf(page + sz, " ");
+ seq_printf(seq, " ");
for (j = 0; j < conf->nr_zones; j++) {
- sz += sprintf(page + sz, "[z%d",
+ seq_printf(seq, "[z%d",
conf->hash_table[j].zone0 - conf->strip_zone);
if (conf->hash_table[j].zone1)
- sz += sprintf(page+sz, "/z%d] ",
+ seq_printf(seq, "/z%d] ",
conf->hash_table[j].zone1 - conf->strip_zone);
else
- sz += sprintf(page+sz, "] ");
+ seq_printf(seq, "] ");
}
- sz += sprintf(page + sz, "\n");
+ seq_printf(seq, "\n");
for (j = 0; j < conf->nr_strip_zones; j++) {
- sz += sprintf(page + sz, " z%d=[", j);
+ seq_printf(seq, " z%d=[", j);
for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
- sz += sprintf (page+sz, "%s/", partition_name(
+ seq_printf (seq, "%s/", partition_name(
conf->strip_zone[j].dev[k]->dev));
- sz--;
- sz += sprintf (page+sz, "] zo=%d do=%d s=%d\n",
+ seq_printf (seq, "] zo=%d do=%d s=%d\n",
conf->strip_zone[j].zone_offset,
conf->strip_zone[j].dev_offset,
conf->strip_zone[j].size);
}
#endif
- sz += sprintf(page + sz, " %dk chunks", mddev->param.chunk_size/1024);
- return sz;
+ seq_printf(seq, " %dk chunks", mddev->param.chunk_size/1024);
}
static mdk_personality_t raid0_personality=
--- ../linux-2.4.18-26.7.x/drivers/md/raid1.c Mon Feb 24 09:15:28 2003
+++ ./drivers/md/raid1.c Tue Mar 11 11:09:05 2003
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/raid/raid1.h>
#include <asm/atomic.h>
+#include <linux/seq_file.h>
#define MAJOR_NR MD_MAJOR
#define MD_DRIVER
@@ -714,18 +715,17 @@
return (0);
}
-static int raid1_status (char *page, mddev_t *mddev)
+static void raid1_status (struct seq_file *seq, mddev_t *mddev)
{
raid1_conf_t *conf = mddev_to_conf(mddev);
- int sz = 0, i;
+ int i;
- sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks,
+ seq_printf (seq, " [%d/%d] [", conf->raid_disks,
conf->working_disks);
for (i = 0; i < conf->raid_disks; i++)
- sz += sprintf (page+sz, "%s",
+ seq_printf (seq, "%s",
conf->mirrors[i].operational ? "U" : "_");
- sz += sprintf (page+sz, "]");
- return sz;
+ seq_printf (seq, "]");
}
#define LAST_DISK KERN_ALERT \
--- ../linux-2.4.18-26.7.x/drivers/md/raid5.c Mon Feb 24 09:15:36 2003
+++ ./drivers/md/raid5.c Tue Mar 11 11:09:05 2003
@@ -23,6 +23,7 @@
#include <linux/raid/raid5.h>
#include <asm/bitops.h>
#include <asm/atomic.h>
+#include <linux/seq_file.h>
static mdk_personality_t raid5_personality;
@@ -1681,23 +1682,22 @@
}
#endif
-static int raid5_status (char *page, mddev_t *mddev)
+static void raid5_status (struct seq_file *seq, mddev_t *mddev)
{
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
mdp_super_t *sb = mddev->sb;
- int sz = 0, i;
+ int i;
- sz += sprintf (page+sz, " level %d, %dk chunk, algorithm %d", sb->level, sb->chunk_size >> 10, sb->layout);
- sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, conf->working_disks);
+ seq_printf (seq, " level %d, %dk chunk, algorithm %d", sb->level, sb->chunk_size >> 10, sb->layout);
+ seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->working_disks);
for (i = 0; i < conf->raid_disks; i++)
- sz += sprintf (page+sz, "%s", conf->disks[i].operational ? "U" : "_");
- sz += sprintf (page+sz, "]");
+ seq_printf (seq, "%s", conf->disks[i].operational ? "U" : "_");
+ seq_printf (seq, "]");
#if RAID5_DEBUG
#define D(x) \
- sz += sprintf (page+sz, "<"#x":%d>", atomic_read(&conf->x))
+ seq_printf (seq, "<"#x":%d>", atomic_read(&conf->x))
printall(conf);
#endif
- return sz;
}
static void print_raid5_conf (raid5_conf_t *conf)
--- ../linux-2.4.18-26.7.x/fs/seq_file.c Mon Feb 24 09:15:39 2003
+++ ./fs/seq_file.c Tue Mar 11 12:23:48 2003
@@ -295,3 +295,47 @@
m->count = m->size;
return -1;
}
+
+static void *single_start(struct seq_file *p, loff_t *pos)
+{
+ return NULL + (*pos == 0);
+}
+
+static void *single_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ ++*pos;
+ return NULL;
+}
+
+static void single_stop(struct seq_file *p, void *v)
+{
+}
+
+int single_open(struct file *file, int (*show)(struct seq_file *, void*), void *data)
+{
+ struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL);
+ int res = -ENOMEM;
+
+ if (op) {
+ op->start = single_start;
+ op->next = single_next;
+ op->stop = single_stop;
+ op->show = show;
+ res = seq_open(file, op);
+ if (!res)
+ ((struct seq_file *)file->private_data)->private = data;
+ else
+ kfree(op);
+ }
+ return res;
+}
+
+int single_release(struct inode *inode, struct file *file)
+{
+ struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
+ int res = seq_release(inode, file);
+ kfree(op);
+ return res;
+}
+
+
--- ../linux-2.4.18-26.7.x/include/linux/seq_file.h Mon Feb 24 09:15:18 2003
+++ ./include/linux/seq_file.h Tue Mar 11 12:25:15 2003
@@ -52,5 +52,7 @@
int seq_printf(struct seq_file *, const char *, ...)
__attribute__ ((format (printf,2,3)));
+int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
+int single_release(struct inode *, struct file *);
#endif
#endif
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] 2.4.18-26.7.x /proc/mdstat seq_file
2003-03-11 20:56 [PATCH] 2.4.18-26.7.x /proc/mdstat seq_file Rechenberg, Andrew
@ 2003-03-13 1:03 ` Neil Brown
2003-03-13 1:53 ` Kevin P. Fleming
0 siblings, 1 reply; 3+ messages in thread
From: Neil Brown @ 2003-03-13 1:03 UTC (permalink / raw)
To: Rechenberg, Andrew; +Cc: linux-raid
On Tuesday March 11, ARechenberg@shermanfinancialgroup.com wrote:
>
> With Kevin's patch from last week and some help from Randy, I patched
> the md code in Red Hat 2.4.18-26.7.x to use the seq_file interface for
> mdstat. I've attached the patch. As with Kevin's patch, it touches
> almost everything in drivers/md, as well as adding the necessary methods
> to fs/seq_file.c and include/linux/seq_file.h.
While this patch quite possibly works, I think it misses the point of
seq_file.
seq_file allows you to return the content of a possibly-large file in
lots of little bits, so multiple 'read' calls on the file don't
iterrate over the whole file every time.
I recently sent a patch to Linus for 2.5 that converts /proc/mdstat to
use seq_file using what I understand to be the intended approach, and
I have just posted as similar patch for 2.4 to Marcelo. I expect it
appeared on linux-raid before you saw this...
NeilBrown
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] 2.4.18-26.7.x /proc/mdstat seq_file
2003-03-13 1:03 ` Neil Brown
@ 2003-03-13 1:53 ` Kevin P. Fleming
0 siblings, 0 replies; 3+ messages in thread
From: Kevin P. Fleming @ 2003-03-13 1:53 UTC (permalink / raw)
To: linux-raid
Neil Brown wrote:
> On Tuesday March 11, ARechenberg@shermanfinancialgroup.com wrote:
>
>>With Kevin's patch from last week and some help from Randy, I patched
>>the md code in Red Hat 2.4.18-26.7.x to use the seq_file interface for
>>mdstat. I've attached the patch. As with Kevin's patch, it touches
>>almost everything in drivers/md, as well as adding the necessary methods
>>to fs/seq_file.c and include/linux/seq_file.h.
>
>
> While this patch quite possibly works, I think it misses the point of
> seq_file.
>
> seq_file allows you to return the content of a possibly-large file in
> lots of little bits, so multiple 'read' calls on the file don't
> iterrate over the whole file every time.
>
> I recently sent a patch to Linus for 2.5 that converts /proc/mdstat to
> use seq_file using what I understand to be the intended approach, and
> I have just posted as similar patch for 2.4 to Marcelo. I expect it
> appeared on linux-raid before you saw this...
>
Yep, saw it, it's most definitely a better solution. My solution was
quick and dirty, only get it to stop oopsing (and so I could actually
monitor my arrays).
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2003-03-13 1:53 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-03-11 20:56 [PATCH] 2.4.18-26.7.x /proc/mdstat seq_file Rechenberg, Andrew
2003-03-13 1:03 ` Neil Brown
2003-03-13 1:53 ` Kevin P. Fleming
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).