From mboxrd@z Thu Jan 1 00:00:00 1970 From: wcheng@sourceware.org Date: 13 Mar 2007 21:21:22 -0000 Subject: [Cluster-devel] cluster/gfs-kernel/src/gfs daemon.c gfs_ondisk ... Message-ID: <20070313212122.4022.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL4 Changes by: wcheng at sourceware.org 2007-03-13 21:21:21 Modified files: gfs-kernel/src/gfs: daemon.c gfs_ondisk.h incore.h ioctl.c ops_fstype.c ops_super.c rgrp.c rgrp.h super.c super.h Log message: Bugzilla 220622: backport GFS2 statfs() implementation to GFS1. There are few compromises made while porting GFS2 approach over, mostly to avoid on-disk structure changes. GFS2 allocates (number-of-nodes + 1) physical files into disk during mkfs time but GFS1 only has one extra space (the unused license file) for this purpose. We deviate from GFS2 by writing the local per-node changes into a memory buffer. Doing a "df" on a quiet filesystem now results: dhcp145 (1 cpu HP): old df took 0.875 seconds, new df 0.008 second dhcp146 (4 cpus DELL): old df took 0.808 seconds, new df 0.006 second. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/daemon.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5&r2=1.5.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/gfs_ondisk.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.1&r2=1.7.2.2 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/incore.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.17.2.8&r2=1.17.2.9 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/ioctl.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.6&r2=1.7.2.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/ops_fstype.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.13.2.3&r2=1.13.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/ops_super.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.12.2.5&r2=1.12.2.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/rgrp.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.14.2.3&r2=1.14.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/rgrp.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3&r2=1.3.2.1 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/super.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.12.2.7&r2=1.12.2.8 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/super.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2&r2=1.2.2.1 --- cluster/gfs-kernel/src/gfs/daemon.c 2005/01/04 10:07:11 1.5 +++ cluster/gfs-kernel/src/gfs/daemon.c 2007/03/13 21:21:21 1.5.2.1 @@ -207,6 +207,19 @@ complete(&sdp->sd_thread_completion); for (;;) { + /* Update statfs file */ + if (gfs_tune_get(sdp, gt_statfs_fast) && + time_after_eq(jiffies, + sdp->sd_statfs_sync_time + + gfs_tune_get(sdp, gt_statfs_fast) * HZ)) { + error = gfs_statfs_sync(sdp); + if (error && + error != -EROFS && + !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + printk("GFS: fsid=%s: statfs: error = %d\n", + sdp->sd_fsname, error); + sdp->sd_statfs_sync_time = jiffies; + } /* Update quota file */ if (time_after_eq(jiffies, sdp->sd_quota_sync_time + --- cluster/gfs-kernel/src/gfs/gfs_ondisk.h 2006/11/06 21:01:36 1.7.2.1 +++ cluster/gfs-kernel/src/gfs/gfs_ondisk.h 2007/03/13 21:21:21 1.7.2.2 @@ -616,6 +616,31 @@ uint32_t ea_pad; }; +/* + * Statfs change + * Describes an change to the pool of free and allocated + * blocks. + */ + +struct gfs_statfs_change { + __be64 sc_total; + __be64 sc_free; + __be64 sc_dinodes; +}; + +/* +struct gfs_statfs_change_host { + __u64 sc_total; + __u64 sc_free; + __u64 sc_dinodes; +}; +*/ +struct gfs_statfs_change_host { + int64_t sc_total; + int64_t sc_free; + int64_t sc_dinodes; +}; + /* Endian functions */ #define GFS_ENDIAN_BIG --- cluster/gfs-kernel/src/gfs/incore.h 2007/01/22 07:43:52 1.17.2.8 +++ cluster/gfs-kernel/src/gfs/incore.h 2007/03/13 21:21:21 1.17.2.9 @@ -893,6 +893,7 @@ unsigned int gt_greedy_quantum; unsigned int gt_greedy_max; unsigned int gt_rgrp_try_threshold; + unsigned int gt_statfs_fast; }; /* @@ -963,6 +964,13 @@ struct gfs_tune sd_tune; /* Filesystem tuning structure */ + /* statfs */ + struct inode *sd_statfs_inode; + spinlock_t sd_statfs_spin; + struct gfs_statfs_change_host sd_statfs_master; + struct gfs_statfs_change_host sd_statfs_local; + unsigned long sd_statfs_sync_time; + /* Resource group stuff */ struct gfs_inode *sd_riinode; /* Resource Index (rindex) inode */ --- cluster/gfs-kernel/src/gfs/ioctl.c 2007/01/22 07:43:52 1.7.2.6 +++ cluster/gfs-kernel/src/gfs/ioctl.c 2007/03/13 21:21:21 1.7.2.7 @@ -483,6 +483,7 @@ gfs_printf("greedy_quantum %u\n", gt->gt_greedy_quantum); gfs_printf("greedy_max %u\n", gt->gt_greedy_max); gfs_printf("rgrp_try_threshold %u\n", gt->gt_rgrp_try_threshold); + gfs_printf("statfs_fast %u\n", gt->gt_statfs_fast); error = 0; @@ -513,6 +514,7 @@ struct gfs_tune *gt = &sdp->sd_tune; char param[ARG_SIZE], value[ARG_SIZE]; unsigned int x; + int error; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -752,6 +754,15 @@ return -EINVAL; tune_set(gt_rgrp_try_threshold, x); + } else if (strcmp(param, "statfs_fast") == 0) { + if (sscanf(value, "%u", &x) != 1) + return -EINVAL; + error = gfs_statfs_init(sdp, x); + if (error) + return error; + else + tune_set(gt_statfs_fast, x); + } else return -EINVAL; @@ -1496,5 +1507,3 @@ return error; } - - --- cluster/gfs-kernel/src/gfs/ops_fstype.c 2006/09/15 21:42:05 1.13.2.3 +++ cluster/gfs-kernel/src/gfs/ops_fstype.c 2007/03/13 21:21:21 1.13.2.4 @@ -100,6 +100,8 @@ spin_lock_init(&sdp->sd_rg_recent_lock); spin_lock_init(&sdp->sd_rg_forward_lock); + spin_lock_init(&sdp->sd_statfs_spin); + for (x = 0; x < GFS_GL_HASH_SIZE; x++) { sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED; INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list); @@ -472,9 +474,16 @@ /* Read in the license inode */ + /* Piggy back fast df on this inode location + * sb->sb_quota_di.no_formal_ino = jindex_dinode + 2; + * sb->sb_quota_di.no_addr = jindex_dinode + 2; + * sb->sb_license_di.no_formal_ino = jindex_dinode + 3; + * sb->sb_license_di.no_addr = jindex_dinode + 3; + */ + error = gfs_get_linode(sdp); if (error) { - printk("GFS: fsid=%s: can't get license file inode: %d\n", + printk("GFS: fsid=%s: can't get license/statfs file inode: %d\n", sdp->sd_fsname, error); goto fail_qi_free; } --- cluster/gfs-kernel/src/gfs/ops_super.c 2006/09/15 21:42:05 1.12.2.5 +++ cluster/gfs-kernel/src/gfs/ops_super.c 2007/03/13 21:21:21 1.12.2.6 @@ -283,6 +283,9 @@ atomic_inc(&sdp->sd_ops_super); + if (gfs_tune_get(sdp, gt_statfs_fast)) + return(gfs_statfs_fast(sdp, (void *)buf)); + error = gfs_stat_gfs(sdp, &sg, TRUE); if (error) return error; --- cluster/gfs-kernel/src/gfs/rgrp.c 2005/10/24 15:53:03 1.14.2.3 +++ cluster/gfs-kernel/src/gfs/rgrp.c 2007/03/13 21:21:21 1.14.2.4 @@ -18,6 +18,7 @@ #include #include #include +#include #include "gfs.h" #include "bits.h" @@ -1657,6 +1658,9 @@ al->al_alloced_data++; gfs_trans_add_quota(sdp, +1, ip->i_di.di_uid, ip->i_di.di_gid); + + /* total=0, free=-1, dinodes=0 */ + gfs_statfs_modify(sdp, 0, -1, 0); } /** @@ -1711,6 +1715,9 @@ gfs_trans_add_quota(sdp, +1, ip->i_di.di_uid, ip->i_di.di_gid); + /* total=0, free=-1, dinode=0 */ + gfs_statfs_modify(sdp, 0, -1, 0); + return 0; } @@ -1726,6 +1733,7 @@ int gfs_dialloc(struct gfs_inode *dip, uint64_t *block) { + struct gfs_sbd *sdp = dip->i_sbd; struct gfs_alloc *al = dip->i_alloc; struct gfs_rgrpd *rgd = al->al_rgd; uint32_t goal, blk; @@ -1765,6 +1773,9 @@ al->al_alloced_di++; al->al_alloced_meta++; + /* total=0, free=-1, dinodes=1 */ + gfs_statfs_modify(sdp, 0, -1, +1); + return error; } @@ -1797,6 +1808,8 @@ gfs_trans_add_quota(sdp, -(int64_t)blen, ip->i_di.di_uid, ip->i_di.di_gid); + /* total=0, free=+blen, dinodes=0 */ + gfs_statfs_modify(sdp, 0, blen, 0); } /** @@ -1831,6 +1844,9 @@ gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data); + /* total=0, free=blen, dinode=0 */ + gfs_statfs_modify(sdp, 0, blen, 0); + gfs_trans_add_quota(sdp, -(int64_t)blen, ip->i_di.di_uid, ip->i_di.di_gid); @@ -1865,6 +1881,9 @@ gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]); gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data); + + /* total=0, free=1, dinodes=-1 */ + gfs_statfs_modify(rgd->rd_sbd, 0, +1, -1); } /** --- cluster/gfs-kernel/src/gfs/rgrp.h 2005/01/04 10:07:12 1.3 +++ cluster/gfs-kernel/src/gfs/rgrp.h 2007/03/13 21:21:21 1.3.2.1 @@ -57,6 +57,10 @@ void gfs_difree_uninit(struct gfs_rgrpd *rgd, uint64_t addr); void gfs_difree(struct gfs_rgrpd *rgd, struct gfs_inode *ip); +extern void gfs_statfs_modify(struct gfs_sbd *sdp, + int64_t total, + int64_t free, + int64_t dinodes); /* * gfs_rgrp_list * --- cluster/gfs-kernel/src/gfs/super.c 2007/02/06 22:08:36 1.12.2.7 +++ cluster/gfs-kernel/src/gfs/super.c 2007/03/13 21:21:21 1.12.2.8 @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -33,6 +35,7 @@ #include "rgrp.h" #include "super.h" #include "unlinked.h" +#include "trans.h" /** * gfs_tune_init - Fill a gfs_tune structure with default values @@ -85,6 +88,7 @@ gt->gt_greedy_quantum = HZ / 40; gt->gt_greedy_max = HZ / 4; gt->gt_rgrp_try_threshold = 100; + gt->gt_statfs_fast = 0; } /** @@ -682,6 +686,7 @@ if (error) return error; + /* iopen obtained in via gfs_glock_get(..gfs_iopen_glops) */ error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_license_di, CREATE, &sdp->sd_linode); @@ -770,6 +775,7 @@ !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; + gfs_statfs_sync(sdp); gfs_log_flush(sdp); gfs_quota_sync(sdp); gfs_quota_scan(sdp); @@ -1083,3 +1089,199 @@ up(&sdp->sd_freeze_lock); } + +/* + * 03/02/07 wcheng at redhat.com + * Fast statfs implementation - mostly based on GFS2 implementation. + */ + +void gfs_statfs_change_in(struct gfs_statfs_change_host *sc, const void *buf) +{ + const struct gfs_statfs_change *str = buf; + + sc->sc_total = be64_to_cpu(str->sc_total); + sc->sc_free = be64_to_cpu(str->sc_free); + sc->sc_dinodes = be64_to_cpu(str->sc_dinodes); +} + +void gfs_statfs_change_out(const struct gfs_statfs_change_host *sc, void *buf) +{ + struct gfs_statfs_change *str = buf; + + str->sc_total = cpu_to_be64(sc->sc_total); + str->sc_free = cpu_to_be64(sc->sc_free); + str->sc_dinodes = cpu_to_be64(sc->sc_dinodes); +} + +int gfs_statfs_init(struct gfs_sbd *sdp, int flag) +{ + int error; + + /* if flag == 0, do we want to turn this off ? */ + if (!flag) + return 0; + + error = gfs_statfs_start(sdp); + if (error) + printk("GFS: fsid=%s: can't initialize statfs subsystem: %d\n", + sdp->sd_fsname, error); + + return error; +} +int gfs_statfs_start(struct gfs_sbd *sdp) +{ + struct gfs_stat_gfs sg; + struct gfs_inode *m_ip; + struct gfs_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs_statfs_change_host *l_sc = &sdp->sd_statfs_local; + struct buffer_head *m_bh; + struct gfs_holder gh; + int error; + + printk("GFS: fsid=%s: fast statfs start time = %lu\n", + sdp->sd_fsname, get_seconds()); + + /* created via gfs_get_linode() in fill_super(). */ + /* gfs_inode_glops */ + m_ip = sdp->sd_linode; + + /* get real statistics */ + error = gfs_stat_gfs(sdp, &sg, TRUE); + if (error) + return error; + + /* make sure the page is refreshed via glock flushing */ + error = gfs_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, + &gh); + if (error) + goto gfs_statfs_start_out; + + error = gfs_get_inode_buffer(m_ip, &m_bh); + if (error) + goto gfs_statfs_start_unlock; + + error = gfs_trans_begin(sdp, 1, 0); + if (error) + goto gfs_statfs_start_bh; + + spin_lock(&sdp->sd_statfs_spin); + m_sc->sc_total = sg.sg_total_blocks; + m_sc->sc_free = sg.sg_free + sg.sg_free_dinode + sg.sg_free_meta; + m_sc->sc_dinodes = sg.sg_used_dinode; + memset(l_sc, 0, sizeof(struct gfs_statfs_change_host)); + spin_unlock(&sdp->sd_statfs_spin); + + gfs_trans_add_bh(m_ip->i_gl, m_bh); + gfs_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs_dinode)); + + gfs_trans_end(sdp); + +gfs_statfs_start_bh: + brelse(m_bh); + +gfs_statfs_start_unlock: + gfs_glock_dq_uninit(&gh); + +gfs_statfs_start_out: + return 0; +} + +void gfs_statfs_modify(struct gfs_sbd *sdp, + int64_t total, + int64_t free, + int64_t dinodes) +{ + struct gfs_statfs_change_host *l_sc = &sdp->sd_statfs_local; + + spin_lock(&sdp->sd_statfs_spin); + l_sc->sc_total += total; + l_sc->sc_free += free; + l_sc->sc_dinodes += dinodes; + spin_unlock(&sdp->sd_statfs_spin); +} + +int gfs_statfs_sync(struct gfs_sbd *sdp) +{ + struct gfs_inode *m_ip = sdp->sd_linode; + struct gfs_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs_statfs_change_host *l_sc = &sdp->sd_statfs_local; + struct gfs_holder gh; + struct buffer_head *m_bh; + int error; + + error = gfs_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, + &gh); + if (error) + return error; + + error = gfs_get_inode_buffer(m_ip, &m_bh); + if (error) + goto gfs_statfs_sync_out; + + /* if no change, simply return */ + spin_lock(&sdp->sd_statfs_spin); + gfs_statfs_change_in(m_sc, m_bh->b_data + + sizeof(struct gfs_dinode)); + if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) { + spin_unlock(&sdp->sd_statfs_spin); + goto out_bh; + } + spin_unlock(&sdp->sd_statfs_spin); + + error = gfs_trans_begin(sdp, 1, 0); + if (error) + goto out_bh; + + spin_lock(&sdp->sd_statfs_spin); + m_sc->sc_total += l_sc->sc_total; + m_sc->sc_free += l_sc->sc_free; + m_sc->sc_dinodes += l_sc->sc_dinodes; + memset(l_sc, 0, sizeof(struct gfs_statfs_change_host)); + spin_unlock(&sdp->sd_statfs_spin); + + gfs_trans_add_bh(m_ip->i_gl, m_bh); + gfs_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs_dinode)); + + gfs_trans_end(sdp); + +out_bh: + brelse(m_bh); + +gfs_statfs_sync_out: + gfs_glock_dq_uninit(&gh); + return error; +} + +int gfs_statfs_fast(struct gfs_sbd *sdp, void *b) +{ + struct kstatfs *buf = (struct kstatfs *)b; + struct gfs_statfs_change_host sc, *m_sc = &sdp->sd_statfs_master; + struct gfs_statfs_change_host *l_sc = &sdp->sd_statfs_local; + + spin_lock(&sdp->sd_statfs_spin); + + sc.sc_total = m_sc->sc_total + l_sc->sc_total; + sc.sc_free = m_sc->sc_free + l_sc->sc_free; + sc.sc_dinodes = m_sc->sc_dinodes + l_sc->sc_dinodes; + spin_unlock(&sdp->sd_statfs_spin); + + if (sc.sc_free < 0) + sc.sc_free = 0; + if (sc.sc_free > sc.sc_total) + sc.sc_free = sc.sc_total; + if (sc.sc_dinodes < 0) + sc.sc_dinodes = 0; + + /* fill in the statistics */ + memset(buf, 0, sizeof(struct kstatfs)); + + buf->f_type = GFS_MAGIC; buf->f_bsize = sdp->sd_sb.sb_bsize; + buf->f_blocks = sc.sc_total; + buf->f_bfree = sc.sc_free; + buf->f_bavail = sc.sc_free; + buf->f_files = sc.sc_dinodes + sc.sc_free; + buf->f_ffree = sc.sc_free; + buf->f_namelen = GFS_FNAMESIZE; + + return 0; +} --- cluster/gfs-kernel/src/gfs/super.h 2005/01/04 10:07:12 1.2 +++ cluster/gfs-kernel/src/gfs/super.h 2007/03/13 21:21:21 1.2.2.1 @@ -42,6 +42,10 @@ int gfs_make_fs_rw(struct gfs_sbd *sdp); int gfs_make_fs_ro(struct gfs_sbd *sdp); +int gfs_statfs_init(struct gfs_sbd *sdp, int flag); +int gfs_statfs_sync(struct gfs_sbd *sdp); +int gfs_statfs_fast(struct gfs_sbd *sdp, void *buf); + struct gfs_stat_gfs { uint64_t sg_total_blocks; uint64_t sg_free;