* PATCH: Making mb_history length a dynamic tunable
@ 2009-04-07 17:20 Curt Wohlgemuth
2009-04-18 7:51 ` Michael Rubin
2009-04-20 3:22 ` Andreas Dilger
0 siblings, 2 replies; 8+ messages in thread
From: Curt Wohlgemuth @ 2009-04-07 17:20 UTC (permalink / raw)
To: ext4 development
Hi:
Since we frequently run in memory-constrained systems with many partitions,
the ~68K for each partition for the mb_history buffer can be excessive. The
following creates a new proc file under /proc/fs/ext4/ to control the number
of entries at mount time.
If the notion of a history length tunable is okay, but the location should
be under /sys/fs/ext4/ instead of /proc/fs/ext4/, I can change this. The
leftover files under /proc/fs/ext4/<partition>/ are a bit confusing to me.
Thanks,
Curt
Signed-off-by: Curt Wohlgemuth <curtw@google.com>
---
--- ext4/fs/ext4/mballoc.c.orig 2009-04-07 09:13:12.000000000 -0700
+++ ext4/fs/ext4/mballoc.c 2009-04-07 10:08:05.000000000 -0700
@@ -334,6 +334,10 @@
static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep;
static struct kmem_cache *ext4_free_ext_cachep;
+
+#define DEFAULT_HIST_SIZE 1000
+static int ext4_mbhist_size = DEFAULT_HIST_SIZE;
+
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb,
void *bitmap,
@@ -2417,31 +2421,36 @@ static struct file_operations ext4_mb_se
static void ext4_mb_history_release(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int delete_mbhist = sbi->s_mb_history_max != 0;
if (sbi->s_proc != NULL) {
remove_proc_entry("mb_groups", sbi->s_proc);
- remove_proc_entry("mb_history", sbi->s_proc);
+ if (delete_mbhist)
+ remove_proc_entry("mb_history", sbi->s_proc);
}
- kfree(sbi->s_mb_history);
+ if (delete_mbhist)
+ kfree(sbi->s_mb_history);
}
static void ext4_mb_history_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int i;
+ int create_mbhist = ext4_mbhist_size != 0;
if (sbi->s_proc != NULL) {
- proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
- &ext4_mb_seq_history_fops, sb);
+ if (create_mbhist)
+ proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
+ &ext4_mb_seq_history_fops, sb);
proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
&ext4_mb_seq_groups_fops, sb);
}
- sbi->s_mb_history_max = 1000;
+ sbi->s_mb_history_max = ext4_mbhist_size;
sbi->s_mb_history_cur = 0;
spin_lock_init(&sbi->s_mb_history_lock);
i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
- sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
+ sbi->s_mb_history = create_mbhist ? kzalloc(i, GFP_KERNEL) : NULL;
/* if we can't allocate history, then we simple won't use it */
}
@@ -2894,6 +2903,49 @@ static void release_blocks_on_commit(jou
mb_debug("freed %u blocks in %u structures\n", count, count2);
}
+static ssize_t read_mb_hist_size(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ char buffer[20];
+ size_t len;
+
+ len = snprintf(buffer, sizeof(buffer), "%i\n", ext4_mbhist_size);
+
+ return simple_read_from_buffer(buf, count, ppos, buffer, len);
+}
+
+static ssize_t write_mb_hist_size(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ /* This allows for 99999 entries, at 68 bytes each */
+#define MAX_BUFSIZE 6
+ char kbuf[MAX_BUFSIZE + 1];
+ int value;
+
+ if (count) {
+ if (count > MAX_BUFSIZE)
+ return -EINVAL;
+ if (copy_from_user(&kbuf, buf, count))
+ return -EFAULT;
+ kbuf[min(count, sizeof(kbuf))-1] = '\0';
+
+
+ value = simple_strtol(kbuf, NULL, 0);
+
+ if (value < 0)
+ return -EINVAL;
+
+ ext4_mbhist_size = value;
+ }
+ return count;
+#undef MAX_BUFSIZE
+}
+
+static struct file_operations ext4_mb_size_fops = {
+ .read = read_mb_hist_size,
+ .write = write_mb_hist_size,
+};
+
int __init init_ext4_mballoc(void)
{
ext4_pspace_cachep =
@@ -2912,6 +2964,9 @@ int __init init_ext4_mballoc(void)
return -ENOMEM;
}
+ proc_create("mb_hist_size", S_IRUGO | S_IWUGO, ext4_proc_root,
+ &ext4_mb_size_fops);
+
ext4_free_ext_cachep =
kmem_cache_create("ext4_free_block_extents",
sizeof(struct ext4_free_data),
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: PATCH: Making mb_history length a dynamic tunable 2009-04-07 17:20 PATCH: Making mb_history length a dynamic tunable Curt Wohlgemuth @ 2009-04-18 7:51 ` Michael Rubin 2009-04-18 12:53 ` Theodore Tso 2009-04-20 3:22 ` Andreas Dilger 1 sibling, 1 reply; 8+ messages in thread From: Michael Rubin @ 2009-04-18 7:51 UTC (permalink / raw) To: Curt Wohlgemuth; +Cc: ext4 development On Tue, Apr 7, 2009 at 10:20 AM, Curt Wohlgemuth <curtw@google.com> wrote: > Hi: > > Since we frequently run in memory-constrained systems with many partitions, > the ~68K for each partition for the mb_history buffer can be excessive. The > following creates a new proc file under /proc/fs/ext4/ to control the number > of entries at mount time. > > If the notion of a history length tunable is okay, but the location should > be under /sys/fs/ext4/ instead of /proc/fs/ext4/, I can change this. The > leftover files under /proc/fs/ext4/<partition>/ are a bit confusing to me. > Does the silence mean that there is no interest in this CL? We thought making this a run time tunable would be cool. mrubin -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Making mb_history length a dynamic tunable 2009-04-18 7:51 ` Michael Rubin @ 2009-04-18 12:53 ` Theodore Tso 0 siblings, 0 replies; 8+ messages in thread From: Theodore Tso @ 2009-04-18 12:53 UTC (permalink / raw) To: Michael Rubin; +Cc: Curt Wohlgemuth, ext4 development On Sat, Apr 18, 2009 at 12:51:43AM -0700, Michael Rubin wrote: > On Tue, Apr 7, 2009 at 10:20 AM, Curt Wohlgemuth <curtw@google.com> wrote: > > Hi: > > > > Since we frequently run in memory-constrained systems with many partitions, > > the ~68K for each partition for the mb_history buffer can be excessive. The > > following creates a new proc file under /proc/fs/ext4/ to control the number > > of entries at mount time. > > > > If the notion of a history length tunable is okay, but the location should > > be under /sys/fs/ext4/ instead of /proc/fs/ext4/, I can change this. The > > leftover files under /proc/fs/ext4/<partition>/ are a bit confusing to me. > > > > Does the silence mean that there is no interest in this CL? No, just that you hit the ext4 dev's during the Linux storage and filesystem workshop, and some of us are still trying to catch up from that. This does seem like a reasonable patch. Yes, it should be in /sys/fs/ext4 (and this should actually be easier to support than /proc/fs/ext4). I've left some stuff under /proc because /sys uses the paradigm of returning a single file per individual tunable, which works great for most things, but it's not so hot for things like /proc/fs/ext4/<dev>/mb_history. However, tunables should go under /sys/fs/ext4. - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Making mb_history length a dynamic tunable 2009-04-07 17:20 PATCH: Making mb_history length a dynamic tunable Curt Wohlgemuth 2009-04-18 7:51 ` Michael Rubin @ 2009-04-20 3:22 ` Andreas Dilger 2009-04-20 18:53 ` Curt Wohlgemuth 1 sibling, 1 reply; 8+ messages in thread From: Andreas Dilger @ 2009-04-20 3:22 UTC (permalink / raw) To: Curt Wohlgemuth; +Cc: ext4 development On Apr 07, 2009 10:20 -0700, Curt Wohlgemuth wrote: > Since we frequently run in memory-constrained systems with many partitions, > the ~68K for each partition for the mb_history buffer can be excessive. The > following creates a new proc file under /proc/fs/ext4/ to control the number > of entries at mount time. Sorry for the delay, it's been a hectic 2 weeks. I'm totally OK with this patch. > If the notion of a history length tunable is okay, but the location should > be under /sys/fs/ext4/ instead of /proc/fs/ext4/, I can change this. The > leftover files under /proc/fs/ext4/<partition>/ are a bit confusing to me. Complex /proc files like this cannot be moved into /sys because it does not support the seqfile interface for having output span more than one page. > --- > --- ext4/fs/ext4/mballoc.c.orig 2009-04-07 09:13:12.000000000 -0700 > +++ ext4/fs/ext4/mballoc.c 2009-04-07 10:08:05.000000000 -0700 > @@ -334,6 +334,10 @@ > static struct kmem_cache *ext4_pspace_cachep; > static struct kmem_cache *ext4_ac_cachep; > static struct kmem_cache *ext4_free_ext_cachep; > + > +#define DEFAULT_HIST_SIZE 1000 > +static int ext4_mbhist_size = DEFAULT_HIST_SIZE; > + > static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, > ext4_group_t group); > static void ext4_mb_generate_from_freelist(struct super_block *sb, > void *bitmap, > @@ -2417,31 +2421,36 @@ static struct file_operations ext4_mb_se > static void ext4_mb_history_release(struct super_block *sb) > { > struct ext4_sb_info *sbi = EXT4_SB(sb); > + int delete_mbhist = sbi->s_mb_history_max != 0; > > if (sbi->s_proc != NULL) { > remove_proc_entry("mb_groups", sbi->s_proc); > - remove_proc_entry("mb_history", sbi->s_proc); > + if (delete_mbhist) > + remove_proc_entry("mb_history", sbi->s_proc); > } > - kfree(sbi->s_mb_history); > + if (delete_mbhist) > + kfree(sbi->s_mb_history); If sbi->s_mb_history == NULL (as opposed to garbage) it is OK to just call kfree() without the extra check. > static void ext4_mb_history_init(struct super_block *sb) > { > struct ext4_sb_info *sbi = EXT4_SB(sb); > int i; > + int create_mbhist = ext4_mbhist_size != 0; > > if (sbi->s_proc != NULL) { > - proc_create_data("mb_history", S_IRUGO, sbi->s_proc, > - &ext4_mb_seq_history_fops, sb); > + if (create_mbhist) > + proc_create_data("mb_history", S_IRUGO, sbi->s_proc, > + &ext4_mb_seq_history_fops, sb); > proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, > &ext4_mb_seq_groups_fops, sb); > } > > - sbi->s_mb_history_max = 1000; > + sbi->s_mb_history_max = ext4_mbhist_size; > sbi->s_mb_history_cur = 0; > spin_lock_init(&sbi->s_mb_history_lock); > i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); > - sbi->s_mb_history = kzalloc(i, GFP_KERNEL); > + sbi->s_mb_history = create_mbhist ? kzalloc(i, GFP_KERNEL) : NULL; > /* if we can't allocate history, then we simple won't use it */ > } Since the s_mb_history buffers are allocated at mount time only, and later writing to mb_hist_size does not affect their size, it means that mb_hist_size must be set before the filesystems are mounted. However, that makes it impossible to tune the root filesystem history size, and at best it is difficult to tune the other filesystems because the value has to be set by an init script after root mounts but before other filesystems mount. Another possible option is to have a module parameter that can be set when the ext4 module is inserted, so that it is sure to be available for all filesystems, but this won't help if ext4 is built into the kernel. The final (though more complex) option is to add a per-fs tunable that allows changing the history size at runtime for each filesystem. > @@ -2894,6 +2903,49 @@ static void release_blocks_on_commit(jou > mb_debug("freed %u blocks in %u structures\n", count, count2); > } > > +static ssize_t read_mb_hist_size(struct file *file, char __user *buf, > + size_t count, loff_t *ppos) > +static ssize_t write_mb_hist_size(struct file *file, const char __user *buf, > + size_t count, loff_t *ppos) Please add an ext4_ prefix to these function names. > +{ > + /* This allows for 99999 entries, at 68 bytes each */ > +#define MAX_BUFSIZE 6 > + char kbuf[MAX_BUFSIZE + 1]; > + int value; > + > + if (count) { > + if (count > MAX_BUFSIZE) > + return -EINVAL; > + if (copy_from_user(&kbuf, buf, count)) > + return -EFAULT; > + kbuf[min(count, sizeof(kbuf))-1] = '\0'; This is could probably be avoided. Simply initializing "kbuf" at declaration time will ensure that there is a trailing NUL because at most MAX_BUFSIZE bytes are copied into it. char kbuf[MAX_BUFSIZE + 1] = ""; > + value = simple_strtol(kbuf, NULL, 0); > + > + if (value < 0) > + return -EINVAL; > + > + ext4_mbhist_size = value; > + } > + return count; > +#undef MAX_BUFSIZE > +} > + > +static struct file_operations ext4_mb_size_fops = { > + .read = read_mb_hist_size, > + .write = write_mb_hist_size, > +}; > + > int __init init_ext4_mballoc(void) > { > ext4_pspace_cachep = > @@ -2912,6 +2964,9 @@ int __init init_ext4_mballoc(void) > return -ENOMEM; > } > > + proc_create("mb_hist_size", S_IRUGO | S_IWUGO, ext4_proc_root, > + &ext4_mb_size_fops); > + > ext4_free_ext_cachep = > kmem_cache_create("ext4_free_block_extents", > sizeof(struct ext4_free_data), Cheers, Andreas -- Andreas Dilger Sr. Staff Engineer, Lustre Group Sun Microsystems of Canada, Inc. ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Making mb_history length a dynamic tunable 2009-04-20 3:22 ` Andreas Dilger @ 2009-04-20 18:53 ` Curt Wohlgemuth 2009-04-20 19:26 ` Theodore Tso 2009-05-02 0:31 ` [PATCH] ext4: Make the length of the mb_history file tunable Theodore Ts'o 0 siblings, 2 replies; 8+ messages in thread From: Curt Wohlgemuth @ 2009-04-20 18:53 UTC (permalink / raw) To: Andreas Dilger; +Cc: ext4 development Hi Andreas: > On Apr 07, 2009 10:20 -0700, Curt Wohlgemuth wrote: > > Since we frequently run in memory-constrained systems with many partitions, > > the ~68K for each partition for the mb_history buffer can be excessive. The > > following creates a new proc file under /proc/fs/ext4/ to control the number > > of entries at mount time. > > Sorry for the delay, it's been a hectic 2 weeks. I'm totally OK with this > patch. Thanks for the detailed reply. > > If the notion of a history length tunable is okay, but the location should > > be under /sys/fs/ext4/ instead of /proc/fs/ext4/, I can change this. The > > leftover files under /proc/fs/ext4/<partition>/ are a bit confusing to me. > > Complex /proc files like this cannot be moved into /sys because it > does not support the seqfile interface for having output span more > than one page. Got it. > > static void ext4_mb_generate_from_freelist(struct super_block *sb, > > void *bitmap, > > @@ -2417,31 +2421,36 @@ static struct file_operations ext4_mb_se > > static void ext4_mb_history_release(struct super_block *sb) > > { > > struct ext4_sb_info *sbi = EXT4_SB(sb); > > + int delete_mbhist = sbi->s_mb_history_max != 0; > > > > if (sbi->s_proc != NULL) { > > remove_proc_entry("mb_groups", sbi->s_proc); > > - remove_proc_entry("mb_history", sbi->s_proc); > > + if (delete_mbhist) > > + remove_proc_entry("mb_history", sbi->s_proc); > > } > > - kfree(sbi->s_mb_history); > > + if (delete_mbhist) > > + kfree(sbi->s_mb_history); > > If sbi->s_mb_history == NULL (as opposed to garbage) it is OK to > just call kfree() without the extra check. Done. > Since the s_mb_history buffers are allocated at mount time only, > and later writing to mb_hist_size does not affect their size, > it means that mb_hist_size must be set before the filesystems are > mounted. However, that makes it impossible to tune the root > filesystem history size, and at best it is difficult to tune the > other filesystems because the value has to be set by an init > script after root mounts but before other filesystems mount. > > Another possible option is to have a module parameter that can be > set when the ext4 module is inserted, so that it is sure to be > available for all filesystems, but this won't help if ext4 is > built into the kernel. Which is certainly our case. > The final (though more complex) option is to add a per-fs > tunable that allows changing the history size at runtime for > each filesystem. Right, I thought about this. But avoiding the race when the tunable is being changed at the same time that history is being updated just didn't seem worth it to me. I see the mb_history mostly as a debug issue; having to umount/remount a FS to change history doesn't seem too onerous -- with the exception, of course, of the root FS. Do you think this is too big a burden, and I should look at handling it while a FS is mounted? > > @@ -2894,6 +2903,49 @@ static void release_blocks_on_commit(jou > > mb_debug("freed %u blocks in %u structures\n", count, count2); > > } > > > > +static ssize_t read_mb_hist_size(struct file *file, char __user *buf, > > + size_t count, loff_t *ppos) > > +static ssize_t write_mb_hist_size(struct file *file, const char __user *buf, > > + size_t count, loff_t *ppos) > > Please add an ext4_ prefix to these function names. Done. > > +{ > > + /* This allows for 99999 entries, at 68 bytes each */ > > +#define MAX_BUFSIZE 6 > > + char kbuf[MAX_BUFSIZE + 1]; > > + int value; > > + > > + if (count) { > > + if (count > MAX_BUFSIZE) > > + return -EINVAL; > > + if (copy_from_user(&kbuf, buf, count)) > > + return -EFAULT; > > + kbuf[min(count, sizeof(kbuf))-1] = '\0'; > > This is could probably be avoided. Simply initializing "kbuf" at > declaration time will ensure that there is a trailing NUL because > at most MAX_BUFSIZE bytes are copied into it. > > char kbuf[MAX_BUFSIZE + 1] = ""; Ah, right, thanks; done. Second patch follows. Signed-off-by: Curt Wohlgemuth <curtw@google.com> --- --- ext4/fs/ext4/mballoc.c.orig 2009-04-07 09:13:12.000000000 -0700 +++ ext4/fs/ext4/mballoc.c 2009-04-20 11:22:25.000000000 -0700 @@ -334,6 +334,10 @@ static struct kmem_cache *ext4_pspace_cachep; static struct kmem_cache *ext4_ac_cachep; static struct kmem_cache *ext4_free_ext_cachep; + +#define DEFAULT_HIST_SIZE 1000 +static int ext4_mbhist_size = DEFAULT_HIST_SIZE; + static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, @@ -2417,10 +2421,12 @@ static void ext4_mb_history_release(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); + int delete_mbhist = sbi->s_mb_history_max != 0; if (sbi->s_proc != NULL) { remove_proc_entry("mb_groups", sbi->s_proc); - remove_proc_entry("mb_history", sbi->s_proc); + if (delete_mbhist) + remove_proc_entry("mb_history", sbi->s_proc); } kfree(sbi->s_mb_history); } @@ -2429,19 +2435,21 @@ { struct ext4_sb_info *sbi = EXT4_SB(sb); int i; + int create_mbhist = ext4_mbhist_size != 0; if (sbi->s_proc != NULL) { - proc_create_data("mb_history", S_IRUGO, sbi->s_proc, - &ext4_mb_seq_history_fops, sb); + if (create_mbhist) + proc_create_data("mb_history", S_IRUGO, sbi->s_proc, + &ext4_mb_seq_history_fops, sb); proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); } - sbi->s_mb_history_max = 1000; + sbi->s_mb_history_max = ext4_mbhist_size; sbi->s_mb_history_cur = 0; spin_lock_init(&sbi->s_mb_history_lock); i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); - sbi->s_mb_history = kzalloc(i, GFP_KERNEL); + sbi->s_mb_history = create_mbhist ? kzalloc(i, GFP_KERNEL) : NULL; /* if we can't allocate history, then we simple won't use it */ } @@ -2894,6 +2902,48 @@ mb_debug("freed %u blocks in %u structures\n", count, count2); } +static ssize_t ext4_read_mb_hist_size(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char buffer[20]; + size_t len; + + len = snprintf(buffer, sizeof(buffer), "%i\n", ext4_mbhist_size); + + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t ext4_write_mb_hist_size(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + /* This allows for 99999 entries, at 68 bytes each */ +#define MAX_BUFSIZE 6 + char kbuf[MAX_BUFSIZE + 1] = ""; + int value; + + if (count) { + if (count > MAX_BUFSIZE) + return -EINVAL; + if (copy_from_user(&kbuf, buf, count)) + return -EFAULT; + + value = simple_strtol(kbuf, NULL, 0); + + if (value < 0) + return -EINVAL; + + ext4_mbhist_size = value; + } + return count; +#undef MAX_BUFSIZE +} + +static struct file_operations ext4_mb_size_fops = { + .read = ext4_read_mb_hist_size, + .write = ext4_write_mb_hist_size, +}; + int __init init_ext4_mballoc(void) { ext4_pspace_cachep = @@ -2912,6 +2962,9 @@ return -ENOMEM; } + proc_create("mb_hist_size", S_IRUGO | S_IWUGO, ext4_proc_root, + &ext4_mb_size_fops); + ext4_free_ext_cachep = kmem_cache_create("ext4_free_block_extents", sizeof(struct ext4_free_data), ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: PATCH: Making mb_history length a dynamic tunable 2009-04-20 18:53 ` Curt Wohlgemuth @ 2009-04-20 19:26 ` Theodore Tso 2009-05-02 0:31 ` [PATCH] ext4: Make the length of the mb_history file tunable Theodore Ts'o 1 sibling, 0 replies; 8+ messages in thread From: Theodore Tso @ 2009-04-20 19:26 UTC (permalink / raw) To: Curt Wohlgemuth; +Cc: Andreas Dilger, ext4 development On Mon, Apr 20, 2009 at 11:53:12AM -0700, Curt Wohlgemuth wrote: > > > Since the s_mb_history buffers are allocated at mount time only, > > and later writing to mb_hist_size does not affect their size, > > it means that mb_hist_size must be set before the filesystems are > > mounted. However, that makes it impossible to tune the root > > filesystem history size, and at best it is difficult to tune the > > other filesystems because the value has to be set by an init > > script after root mounts but before other filesystems mount. If it's only going to be allocated at mount-time (as opposed doing it at run-time --- where for simplicities sake I would just simply flush the existing history when resizing the mb_history buffers), I would make this a mount option. This would allow it to be set for the root filesystem via a boot option (rootfsflags). > Right, I thought about this. But avoiding the race when the tunable is > being changed at the same time that history is being updated just didn't > seem worth it to me. Fixing this wouldn't be that hard; I would use read-copy-update (RCU) and just free the old history when changing the size of the mb_history. But setting it as a mount option would also be fine, I think. - Ted ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] ext4: Make the length of the mb_history file tunable 2009-04-20 18:53 ` Curt Wohlgemuth 2009-04-20 19:26 ` Theodore Tso @ 2009-05-02 0:31 ` Theodore Ts'o 2009-05-02 16:30 ` Curt Wohlgemuth 1 sibling, 1 reply; 8+ messages in thread From: Theodore Ts'o @ 2009-05-02 0:31 UTC (permalink / raw) To: Curt Wohlgemuth Cc: Michael Rubin, Ext4 Developers List, Curt Wohlgemuth, Theodore Ts'o From: Curt Wohlgemuth <curtw@google.com> In memory-constrained systems with many partitions, the ~68K for each partition for the mb_history buffer can be excessive. This patch adds a new mount option, mb_history_length, as well as a way of setting the default via a module parameter (or via a sysfs parameter in /sys/module/ext4/parameter/default_mb_history_length). If the mb_history_length is set to zero, the mb_history facility is disabled entirely. Signed-off-by: Curt Wohlgemuth <curtw@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- I completely revamped how how tunable is set. The combination of a mount option plus a default which can be tuned via a sysfs file or a module parameter is cleaner, and certainly requires less code. (No need to open-code new file in /proc, which should be avoided if at all possible.) Anyway, this is what I've dropped into the ext4 patch queue. fs/ext4/mballoc.c | 13 +++++++------ fs/ext4/super.c | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dbd47ea..df75855 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2413,7 +2413,8 @@ static void ext4_mb_history_release(struct super_block *sb) if (sbi->s_proc != NULL) { remove_proc_entry("mb_groups", sbi->s_proc); - remove_proc_entry("mb_history", sbi->s_proc); + if (sbi->s_mb_history_max) + remove_proc_entry("mb_history", sbi->s_proc); } kfree(sbi->s_mb_history); } @@ -2424,17 +2425,17 @@ static void ext4_mb_history_init(struct super_block *sb) int i; if (sbi->s_proc != NULL) { - proc_create_data("mb_history", S_IRUGO, sbi->s_proc, - &ext4_mb_seq_history_fops, sb); + if (sbi->s_mb_history_max) + proc_create_data("mb_history", S_IRUGO, sbi->s_proc, + &ext4_mb_seq_history_fops, sb); proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); } - sbi->s_mb_history_max = 1000; sbi->s_mb_history_cur = 0; spin_lock_init(&sbi->s_mb_history_lock); i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); - sbi->s_mb_history = kzalloc(i, GFP_KERNEL); + sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL; /* if we can't allocate history, then we simple won't use it */ } @@ -2444,7 +2445,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_mb_history h; - if (unlikely(sbi->s_mb_history == NULL)) + if (sbi->s_mb_history == NULL) return; if (!(ac->ac_op & sbi->s_mb_history_filter)) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7903f20..39223a5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -47,6 +47,13 @@ #include "xattr.h" #include "acl.h" +static int default_mb_history_length = 1000; + +module_param_named(default_mb_history_length, default_mb_history_length, + int, 0644); +MODULE_PARM_DESC(default_mb_history_length, + "Default number of entries saved for mb_history"); + struct proc_dir_entry *ext4_proc_root; static struct kset *ext4_kset; @@ -1042,7 +1049,7 @@ enum { Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_data_err_abort, Opt_data_err_ignore, + Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, @@ -1088,6 +1095,7 @@ static const match_table_t tokens = { {Opt_data_writeback, "data=writeback"}, {Opt_data_err_abort, "data_err=abort"}, {Opt_data_err_ignore, "data_err=ignore"}, + {Opt_mb_history_length, "mb_history_length=%u"}, {Opt_offusrjquota, "usrjquota="}, {Opt_usrjquota, "usrjquota=%s"}, {Opt_offgrpjquota, "grpjquota="}, @@ -1329,6 +1337,13 @@ static int parse_options(char *options, struct super_block *sb, case Opt_data_err_ignore: clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); break; + case Opt_mb_history_length: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + sbi->s_mb_history_max = option; + break; #ifdef CONFIG_QUOTA case Opt_usrjquota: qtype = USRQUOTA; @@ -2345,6 +2360,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; + sbi->s_mb_history_max = default_mb_history_length; set_opt(sbi->s_mount_opt, BARRIER); -- 1.6.0.4 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] ext4: Make the length of the mb_history file tunable 2009-05-02 0:31 ` [PATCH] ext4: Make the length of the mb_history file tunable Theodore Ts'o @ 2009-05-02 16:30 ` Curt Wohlgemuth 0 siblings, 0 replies; 8+ messages in thread From: Curt Wohlgemuth @ 2009-05-02 16:30 UTC (permalink / raw) To: Theodore Ts'o; +Cc: Michael Rubin, Ext4 Developers List Hi Ted: Thanks for sending this out. It was on my list to re-vamp the patch to use your suggestion of a mount option, but I didn't get a chance. I tested it on our 2.6.26+patches kernel, and it works fine. Curt On Fri, May 1, 2009 at 5:31 PM, Theodore Ts'o <tytso@mit.edu> wrote: > From: Curt Wohlgemuth <curtw@google.com> > > In memory-constrained systems with many partitions, the ~68K for each > partition for the mb_history buffer can be excessive. > > This patch adds a new mount option, mb_history_length, as well as a > way of setting the default via a module parameter (or via a sysfs > parameter in /sys/module/ext4/parameter/default_mb_history_length). > If the mb_history_length is set to zero, the mb_history facility is > disabled entirely. > > Signed-off-by: Curt Wohlgemuth <curtw@google.com> > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> > --- > > I completely revamped how how tunable is set. The combination of a > mount option plus a default which can be tuned via a sysfs file or a > module parameter is cleaner, and certainly requires less code. (No need > to open-code new file in /proc, which should be avoided if at all > possible.) > > Anyway, this is what I've dropped into the ext4 patch queue. > > fs/ext4/mballoc.c | 13 +++++++------ > fs/ext4/super.c | 18 +++++++++++++++++- > 2 files changed, 24 insertions(+), 7 deletions(-) > > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index dbd47ea..df75855 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -2413,7 +2413,8 @@ static void ext4_mb_history_release(struct super_block *sb) > > if (sbi->s_proc != NULL) { > remove_proc_entry("mb_groups", sbi->s_proc); > - remove_proc_entry("mb_history", sbi->s_proc); > + if (sbi->s_mb_history_max) > + remove_proc_entry("mb_history", sbi->s_proc); > } > kfree(sbi->s_mb_history); > } > @@ -2424,17 +2425,17 @@ static void ext4_mb_history_init(struct super_block *sb) > int i; > > if (sbi->s_proc != NULL) { > - proc_create_data("mb_history", S_IRUGO, sbi->s_proc, > - &ext4_mb_seq_history_fops, sb); > + if (sbi->s_mb_history_max) > + proc_create_data("mb_history", S_IRUGO, sbi->s_proc, > + &ext4_mb_seq_history_fops, sb); > proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, > &ext4_mb_seq_groups_fops, sb); > } > > - sbi->s_mb_history_max = 1000; > sbi->s_mb_history_cur = 0; > spin_lock_init(&sbi->s_mb_history_lock); > i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); > - sbi->s_mb_history = kzalloc(i, GFP_KERNEL); > + sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL; > /* if we can't allocate history, then we simple won't use it */ > } > > @@ -2444,7 +2445,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) > struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); > struct ext4_mb_history h; > > - if (unlikely(sbi->s_mb_history == NULL)) > + if (sbi->s_mb_history == NULL) > return; > > if (!(ac->ac_op & sbi->s_mb_history_filter)) > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index 7903f20..39223a5 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -47,6 +47,13 @@ > #include "xattr.h" > #include "acl.h" > > +static int default_mb_history_length = 1000; > + > +module_param_named(default_mb_history_length, default_mb_history_length, > + int, 0644); > +MODULE_PARM_DESC(default_mb_history_length, > + "Default number of entries saved for mb_history"); > + > struct proc_dir_entry *ext4_proc_root; > static struct kset *ext4_kset; > > @@ -1042,7 +1049,7 @@ enum { > Opt_journal_update, Opt_journal_dev, > Opt_journal_checksum, Opt_journal_async_commit, > Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, > - Opt_data_err_abort, Opt_data_err_ignore, > + Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, > Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, > Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, > Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, > @@ -1088,6 +1095,7 @@ static const match_table_t tokens = { > {Opt_data_writeback, "data=writeback"}, > {Opt_data_err_abort, "data_err=abort"}, > {Opt_data_err_ignore, "data_err=ignore"}, > + {Opt_mb_history_length, "mb_history_length=%u"}, > {Opt_offusrjquota, "usrjquota="}, > {Opt_usrjquota, "usrjquota=%s"}, > {Opt_offgrpjquota, "grpjquota="}, > @@ -1329,6 +1337,13 @@ static int parse_options(char *options, struct super_block *sb, > case Opt_data_err_ignore: > clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); > break; > + case Opt_mb_history_length: > + if (match_int(&args[0], &option)) > + return 0; > + if (option < 0) > + return 0; > + sbi->s_mb_history_max = option; > + break; > #ifdef CONFIG_QUOTA > case Opt_usrjquota: > qtype = USRQUOTA; > @@ -2345,6 +2360,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) > sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; > sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; > sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; > + sbi->s_mb_history_max = default_mb_history_length; > > set_opt(sbi->s_mount_opt, BARRIER); > > -- > 1.6.0.4 > > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2009-05-02 16:30 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-04-07 17:20 PATCH: Making mb_history length a dynamic tunable Curt Wohlgemuth 2009-04-18 7:51 ` Michael Rubin 2009-04-18 12:53 ` Theodore Tso 2009-04-20 3:22 ` Andreas Dilger 2009-04-20 18:53 ` Curt Wohlgemuth 2009-04-20 19:26 ` Theodore Tso 2009-05-02 0:31 ` [PATCH] ext4: Make the length of the mb_history file tunable Theodore Ts'o 2009-05-02 16:30 ` Curt Wohlgemuth
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).