From mboxrd@z Thu Jan 1 00:00:00 1970 From: wcheng@sourceware.org Date: 28 Aug 2007 14:58:50 -0000 Subject: [Cluster-devel] cluster/gfs-kernel/src/gfs diaper.c gfs.h inco ... Message-ID: <20070828145850.27537.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL4 Changes by: wcheng at sourceware.org 2007-08-28 14:58:48 Modified files: gfs-kernel/src/gfs: diaper.c gfs.h incore.h ioctl.c ops_file.c super.h Log message: Bugzilla 230803: Allow GFS to take advantage of VFS's "bd_inode_backing_dev_info" setting that overrides diaper device's generic readahead value. Various benchmarks show we can achieve 2x more performance with large sequential write. Option is controlled by a new GFS tunable (seq_readahead). Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/diaper.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/gfs.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5&r2=1.5.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/incore.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.17.2.9&r2=1.17.2.10 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/ioctl.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.7&r2=1.7.2.8 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/ops_file.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.16.2.15&r2=1.16.2.16 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/super.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.1&r2=1.2.2.2 --- cluster/gfs-kernel/src/gfs/Attic/diaper.c 2005/05/05 23:19:39 1.1.2.3 +++ cluster/gfs-kernel/src/gfs/Attic/diaper.c 2007/08/28 14:58:48 1.1.2.4 @@ -371,6 +371,7 @@ diaper->bd_inode->i_data.backing_dev_info = &gd->queue->backing_dev_info; } else printk("GFS: diaper: reopening\n"); + diaper->bd_openers++; up(&diaper->bd_sem); --- cluster/gfs-kernel/src/gfs/gfs.h 2005/01/04 10:07:11 1.5 +++ cluster/gfs-kernel/src/gfs/gfs.h 2007/08/28 14:58:48 1.5.2.1 @@ -21,6 +21,7 @@ #include "fixed_div64.h" #include "lvb.h" +#include #include "incore.h" #include "util.h" --- cluster/gfs-kernel/src/gfs/incore.h 2007/03/13 21:21:21 1.17.2.9 +++ cluster/gfs-kernel/src/gfs/incore.h 2007/08/28 14:58:48 1.17.2.10 @@ -894,6 +894,7 @@ unsigned int gt_greedy_max; unsigned int gt_rgrp_try_threshold; unsigned int gt_statfs_fast; + unsigned int gt_seq_readahead; }; /* @@ -959,6 +960,10 @@ struct super_block *sd_vfs; /* Linux VFS device independent sb */ + /* readahead */ + struct backing_dev_info sd_dev_info; + spinlock_t sd_dev_info_spin; + struct gfs_args sd_args; /* Mount arguments */ unsigned long sd_flags; /* SDF_... see above */ --- cluster/gfs-kernel/src/gfs/ioctl.c 2007/03/13 21:21:21 1.7.2.7 +++ cluster/gfs-kernel/src/gfs/ioctl.c 2007/08/28 14:58:48 1.7.2.8 @@ -484,6 +484,7 @@ gfs_printf("greedy_max %u\n", gt->gt_greedy_max); gfs_printf("rgrp_try_threshold %u\n", gt->gt_rgrp_try_threshold); gfs_printf("statfs_fast %u\n", gt->gt_statfs_fast); + gfs_printf("seq_readahead %u\n", gt->gt_seq_readahead); error = 0; @@ -763,6 +764,14 @@ else tune_set(gt_statfs_fast, x); + } else if (strcmp(param, "seq_readahead") == 0) { + if (sscanf(value, "%u", &x) != 1) + return -EINVAL; + error = gfs_reset_readahead(sdp, x); + if (error) + return error; + tune_set(gt_seq_readahead, x); + } else return -EINVAL; --- cluster/gfs-kernel/src/gfs/ops_file.c 2006/11/17 20:33:15 1.16.2.15 +++ cluster/gfs-kernel/src/gfs/ops_file.c 2007/08/28 14:58:48 1.16.2.16 @@ -1743,6 +1743,57 @@ return do_flock(file, cmd, fl); } +/* + * VFS layer offers a "bd_inode_backing_dev_info" pointer to allow + * filesystem overriding its block device generic readahead value. + * We'll take this advantage to tune for large sequential write. + */ +struct backing_dev_info gfs_file_backing_dev_info = { + .ra_pages = 0, /* place holder */ + .memory_backed = 0, /* place holder */ + .unplug_io_fn = default_unplug_io_fn, +}; + +int gfs_reset_readahead(struct gfs_sbd *sdp, unsigned int file_ra) +{ + unsigned int max_ra; + struct backing_dev_info *bdi; + struct super_block *vfs_sb=sdp->sd_vfs; + + /* user wants to reset back to default */ + if (file_ra == 0) { + vfs_sb->s_bdev->bd_inode_backing_dev_info = NULL; + return 0; + } + + /* can't exceed gfs max readahead */ + max_ra = gfs_tune_get(sdp, gt_max_readahead); + if (file_ra > max_ra) { + printk("GFS: einval - readahead exceeds max\n"); + return -EINVAL; + } + + bdi = sdp->sd_vfs->s_bdev->bd_inode_backing_dev_info; + + /* sanity check */ + if (gfs_tune_get(sdp, gt_seq_readahead) && !bdi) { + printk("GFS: error - gt_seq_readahead set but bdi is NULL\n"); + return -EIO; + } + + /* add pointer */ + if (!bdi) { + bdi = &sdp->sd_dev_info; + vfs_sb->s_bdev->bd_inode_backing_dev_info = bdi; + *bdi = *(vfs_sb->s_bdev->bd_inode->i_data.backing_dev_info); + } + + /* now change the setting */ + bdi->ra_pages = file_ra; + + return 0; +} + struct file_operations gfs_file_fops = { .llseek = gfs_llseek, .read = gfs_read, --- cluster/gfs-kernel/src/gfs/super.h 2007/03/13 21:21:21 1.2.2.1 +++ cluster/gfs-kernel/src/gfs/super.h 2007/08/28 14:58:48 1.2.2.2 @@ -63,4 +63,6 @@ int gfs_freeze_fs(struct gfs_sbd *sdp); void gfs_unfreeze_fs(struct gfs_sbd *sdp); +int gfs_reset_readahead(struct gfs_sbd *sdp, unsigned int file_ra); + #endif /* __SUPER_DOT_H__ */