* [Cluster-devel] [GFS2 Patch][TRY 2] GFS2: Extend the life of the reservations structure [not found] <873bd4a2-fac6-4347-b487-832e2f969344@zmail12.collab.prod.int.phx2.redhat.com> @ 2012-05-09 16:22 ` Bob Peterson 2012-05-11 9:36 ` Steven Whitehouse 0 siblings, 1 reply; 7+ messages in thread From: Bob Peterson @ 2012-05-09 16:22 UTC (permalink / raw) To: cluster-devel.redhat.com Hi, This is a second attempt at this patch. Explanation is below. The idea here is to extend the lifespan of the block reservations structure. This has three benefits: First, for performance, GFS2 spends less time allocating and freeing memory for this purpose and can re-use the structure already attached to the inode. Second, it allows us to re-combine the quota structure in a to save the time we spend allocating and freeing that, in a future patch. Third, it sets us up for the forthcoming multi-block reservations patch. There was an issue with the predecessor patch not doing any locking during the allocating and freeing of the reservations for an inode. This patch re-purposes the gfs2 i_rw_mutex (to save memory) to protect the allocations and deletes. I couldn't see any places where this could conflict or deadlock with existing uses of the mutex. Regards, Bob Peterson Red Hat File Systems Signed-off-by: Bob Peterson <rpeterso@redhat.com> --- Author: Bob Peterson <rpeterso@redhat.com> Date: Thu Apr 12 11:37:24 2012 -0500 GFS2: Extend the life of the reservations structure This patch lengthens the lifespan of the reservations structure for inodes. Before, they were allocated and deallocated for every write operation. With this patch, they are allocated when the first write occurs, and deallocated when the last process closes the file. It's more efficient to do it this way because it saves GFS2 a lot of unnecessary allocates and frees. It also gives us more flexibility for the future: (1) we can now fold the qadata structure back into the structure and save those alloc/frees, (2) we can use this for multi-block reservations. diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e80a464..aba77b5 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (ip->i_res) + if (gfs2_mb_reserved(ip)) gfs2_inplace_release(ip); if (qa) { gfs2_quota_unlock(ip); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 31b199f..3790617 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_file *fp; + struct gfs2_inode *ip = GFS2_I(inode); fp = file->private_data; file->private_data = NULL; + if ((file->f_mode & FMODE_WRITE) && ip->i_res && + (atomic_read(&inode->i_writecount) == 1)) + gfs2_rs_delete(ip); + if (gfs2_assert_warn(sdp, fp)) return -EIO; @@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + struct dentry *dentry = file->f_dentry; + struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + int ret; + + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; if (file->f_flags & O_APPEND) { - struct dentry *dentry = file->f_dentry; - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_holder gh; - int ret; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) @@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; + error = gfs2_rs_alloc(ip); + if (error) + return error; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a9ba244..6a46417 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; + error = gfs2_rs_alloc(dip); + if (error) + return error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) goto fail; @@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; + /* the new inode needs a reservation so it can allocate xattrs. */ + error = gfs2_rs_alloc(GFS2_I(inode)); + if (error) + goto fail_gunlock2; + error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_trans_end(sdp); /* Check if we reserved space in the rgrp. Function link_dinode may not, depending on whether alloc is required. */ - if (dip->i_res) + if (gfs2_mb_reserved(dip)) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); gfs2_qadata_put(dip); @@ -740,6 +749,7 @@ fail_gunlock: iput(inode); } fail: + gfs2_rs_delete(dip); if (bh) brelse(bh); return error; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f74fb9b..e944fef 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -417,6 +417,39 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } +/** + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * @ip: the inode for this reservation + */ +int gfs2_rs_alloc(struct gfs2_inode *ip) +{ + int error = 0; + + down_write(&ip->i_rw_mutex); + if (!ip->i_res) { + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!ip->i_res) + error = -ENOMEM; + } + up_write(&ip->i_rw_mutex); + return error; +} + +/** + * gfs2_rs_delete - delete a reservation + * @ip: The inode for this reservation + * + */ +void gfs2_rs_delete(struct gfs2_inode *ip) +{ + down_write(&ip->i_rw_mutex); + if (ip->i_res) { + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); + ip->i_res = NULL; + } + up_write(&ip->i_rw_mutex); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -993,22 +1026,6 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) } /** - * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode - * @ip: the incore GFS2 inode structure - * - * Returns: the struct gfs2_qadata - */ - -static int gfs2_blkrsv_get(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res != NULL); - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - return -ENOMEM; - return 0; -} - -/** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data * @ip: the inode @@ -1162,13 +1179,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) return -ENOSPC; } -static void gfs2_blkrsv_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res == NULL); - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; -} - /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1181,14 +1191,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_blkreserv *rs; - int error; + int error = 0; u64 last_unlinked = NO_BLOCK; int tries = 0; - error = gfs2_blkrsv_get(ip); - if (error) - return error; - rs = ip->i_res; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { @@ -1213,7 +1219,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) out: if (error) - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; return error; } @@ -1230,7 +1236,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; } /** @@ -1496,7 +1502,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Only happens if there is a bug in gfs2, return something distinctive * to ensure that it is noticed. */ - if (ip->i_res == NULL) + if (ip->i_res->rs_requested == 0) return -ECANCELED; rgd = ip->i_rgd; diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4b10f4..d9eda5f 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -43,6 +43,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); +extern int gfs2_rs_alloc(struct gfs2_inode *ip); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); @@ -68,4 +70,12 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); +/* This is how to tell if a reservation is "inplace" reserved: */ +static inline int gfs2_mb_reserved(struct gfs2_inode *ip) +{ + if (ip->i_res && ip->i_res->rs_requested) + return 1; + return 0; +} + #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6172fa7..a42df66 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1554,6 +1554,7 @@ out_unlock: out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); + gfs2_rs_delete(ip); end_writeback(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; @@ -1576,6 +1577,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip->i_flags = 0; ip->i_gl = NULL; ip->i_rgd = NULL; + ip->i_res = NULL; } return &ip->i_inode; } diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 125d457..41f42cd 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -31,7 +31,7 @@ struct gfs2_glock; static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) { const struct gfs2_blkreserv *rs = ip->i_res; - if (rs->rs_requested < ip->i_rgd->rd_length) + if (rs && rs->rs_requested < ip->i_rgd->rd_length) return rs->rs_requested + 1; return ip->i_rgd->rd_length; } ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Cluster-devel] [GFS2 Patch][TRY 2] GFS2: Extend the life of the reservations structure 2012-05-09 16:22 ` [Cluster-devel] [GFS2 Patch][TRY 2] GFS2: Extend the life of the reservations structure Bob Peterson @ 2012-05-11 9:36 ` Steven Whitehouse 2012-05-14 12:58 ` Bob Peterson 0 siblings, 1 reply; 7+ messages in thread From: Steven Whitehouse @ 2012-05-11 9:36 UTC (permalink / raw) To: cluster-devel.redhat.com Hi, On Wed, 2012-05-09 at 12:22 -0400, Bob Peterson wrote: > Hi, > > This is a second attempt at this patch. Explanation is below. > > The idea here is to extend the lifespan of the block reservations > structure. This has three benefits: First, for performance, GFS2 > spends less time allocating and freeing memory for this purpose > and can re-use the structure already attached to the inode. > Second, it allows us to re-combine the quota structure in a > to save the time we spend allocating and freeing that, in a future > patch. Third, it sets us up for the forthcoming multi-block > reservations patch. > > There was an issue with the predecessor patch not doing any locking > during the allocating and freeing of the reservations for an inode. > This patch re-purposes the gfs2 i_rw_mutex (to save memory) to > protect the allocations and deletes. I couldn't see any places where > this could conflict or deadlock with existing uses of the mutex. > > Regards, > > Bob Peterson > Red Hat File Systems > > Signed-off-by: Bob Peterson <rpeterso@redhat.com> > --- > Author: Bob Peterson <rpeterso@redhat.com> > Date: Thu Apr 12 11:37:24 2012 -0500 > > GFS2: Extend the life of the reservations structure > > This patch lengthens the lifespan of the reservations structure for > inodes. Before, they were allocated and deallocated for every write > operation. With this patch, they are allocated when the first write > occurs, and deallocated when the last process closes the file. > It's more efficient to do it this way because it saves GFS2 a lot of > unnecessary allocates and frees. It also gives us more flexibility > for the future: (1) we can now fold the qadata structure back into > the structure and save those alloc/frees, (2) we can use this for > multi-block reservations. > > diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c > index e80a464..aba77b5 100644 > --- a/fs/gfs2/aops.c > +++ b/fs/gfs2/aops.c > @@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, > brelse(dibh); > failed: > gfs2_trans_end(sdp); > - if (ip->i_res) > + if (gfs2_mb_reserved(ip)) > gfs2_inplace_release(ip); > if (qa) { > gfs2_quota_unlock(ip); > diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c > index 31b199f..3790617 100644 > --- a/fs/gfs2/file.c > +++ b/fs/gfs2/file.c > @@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) > */ > vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); > > + ret = gfs2_rs_alloc(ip); > + if (ret) > + return ret; > + > gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); > ret = gfs2_glock_nq(&gh); > if (ret) > @@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file) > { > struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; > struct gfs2_file *fp; > + struct gfs2_inode *ip = GFS2_I(inode); > > fp = file->private_data; > file->private_data = NULL; > > + if ((file->f_mode & FMODE_WRITE) && ip->i_res && > + (atomic_read(&inode->i_writecount) == 1)) > + gfs2_rs_delete(ip); > + > if (gfs2_assert_warn(sdp, fp)) > return -EIO; > > @@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, > unsigned long nr_segs, loff_t pos) > { > struct file *file = iocb->ki_filp; > + struct dentry *dentry = file->f_dentry; > + struct gfs2_inode *ip = GFS2_I(dentry->d_inode); > + int ret; > + > + ret = gfs2_rs_alloc(ip); > + if (ret) > + return ret; > > if (file->f_flags & O_APPEND) { > - struct dentry *dentry = file->f_dentry; > - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); > struct gfs2_holder gh; > - int ret; > > ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); > if (ret) > @@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, > if (bytes == 0) > bytes = sdp->sd_sb.sb_bsize; > > + error = gfs2_rs_alloc(ip); > + if (error) > + return error; > + > gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); > error = gfs2_glock_nq(&ip->i_gh); > if (unlikely(error)) > diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c > index a9ba244..6a46417 100644 > --- a/fs/gfs2/inode.c > +++ b/fs/gfs2/inode.c > @@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, > if (!name->len || name->len > GFS2_FNAMESIZE) > return -ENAMETOOLONG; > > + error = gfs2_rs_alloc(dip); > + if (error) > + return error; > + > error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); > if (error) > goto fail; > @@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, > if (error) > goto fail_gunlock2; > > + /* the new inode needs a reservation so it can allocate xattrs. */ > + error = gfs2_rs_alloc(GFS2_I(inode)); > + if (error) > + goto fail_gunlock2; > + > error = gfs2_acl_create(dip, inode); > if (error) > goto fail_gunlock2; > @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, > gfs2_trans_end(sdp); > /* Check if we reserved space in the rgrp. Function link_dinode may > not, depending on whether alloc is required. */ > - if (dip->i_res) > + if (gfs2_mb_reserved(dip)) > gfs2_inplace_release(dip); > gfs2_quota_unlock(dip); > gfs2_qadata_put(dip); > @@ -740,6 +749,7 @@ fail_gunlock: > iput(inode); > } > fail: > + gfs2_rs_delete(dip); Should we really be dropping the ip->i_res here I wonder? I'm not sure that we want to forget this information just because (for example) someone has tried to create a new inode and it has been failed for some reason or other. Otherwise I think this looks good, Steve. ^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] [GFS2 Patch][TRY 2] GFS2: Extend the life of the reservations structure 2012-05-11 9:36 ` Steven Whitehouse @ 2012-05-14 12:58 ` Bob Peterson 2012-05-14 13:03 ` Steven Whitehouse 0 siblings, 1 reply; 7+ messages in thread From: Bob Peterson @ 2012-05-14 12:58 UTC (permalink / raw) To: cluster-devel.redhat.com ----- Original Message ----- | > @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, | > struct dentry *dentry, | > gfs2_trans_end(sdp); | > /* Check if we reserved space in the rgrp. Function link_dinode | > may | > not, depending on whether alloc is required. */ | > - if (dip->i_res) | > + if (gfs2_mb_reserved(dip)) | > gfs2_inplace_release(dip); | > gfs2_quota_unlock(dip); | > gfs2_qadata_put(dip); | > @@ -740,6 +749,7 @@ fail_gunlock: | > iput(inode); | > } | > fail: | > + gfs2_rs_delete(dip); | | Should we really be dropping the ip->i_res here I wonder? I'm not | sure | that we want to forget this information just because (for example) | someone has tried to create a new inode and it has been failed for | some | reason or other. | | Otherwise I think this looks good, | | Steve. Hi, In this implementation, function gfs2_inplace_release just unlocks the rgrp glock and sets rs_requested to zero. In fact, the reservation still hangs around, attached to the inode until function gfs2_rs_delete is called from gfs2_release or gfs2_evict_inode. So instead of having two functions for allocation and deallocation, we now have four: gfs2_rs_alloc: kmem_cache_zalloc gfs2_inplace_reserve: hold rgrp glock, rs_requested = reservation needed gfs2_inplace_release: dq rgrp glock, rs_requested = 0 gfs2_rs_delete: kmem_cache_free I can change the names (suggestions welcome), because "gfs2_inplace_reserve" and its counterpart have changed their roles over the years. I'm just conditioned to look for those function names from years of conditioning. :) Something more like "get_local_rgrp" is a more accurate reflection of what's happening, but that name is already used. I've never liked how get_local_rgrp and gfs2_inplace_reserve both have retry loops. Since they're now pretty small functions, and one calls the other, perhaps I should combine them into one function by the name "get_local_rgrp"? And perhaps that should be done in a separate patch? Regards, Bob Peterson Red Hat File Systems ^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] [GFS2 Patch][TRY 2] GFS2: Extend the life of the reservations structure 2012-05-14 12:58 ` Bob Peterson @ 2012-05-14 13:03 ` Steven Whitehouse 2012-05-14 16:15 ` [Cluster-devel] [GFS2 Patch][TRY 3] " Bob Peterson 0 siblings, 1 reply; 7+ messages in thread From: Steven Whitehouse @ 2012-05-14 13:03 UTC (permalink / raw) To: cluster-devel.redhat.com Hi, On Mon, 2012-05-14 at 08:58 -0400, Bob Peterson wrote: > ----- Original Message ----- > | > @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, > | > struct dentry *dentry, > | > gfs2_trans_end(sdp); > | > /* Check if we reserved space in the rgrp. Function link_dinode > | > may > | > not, depending on whether alloc is required. */ > | > - if (dip->i_res) > | > + if (gfs2_mb_reserved(dip)) > | > gfs2_inplace_release(dip); > | > gfs2_quota_unlock(dip); > | > gfs2_qadata_put(dip); > | > @@ -740,6 +749,7 @@ fail_gunlock: > | > iput(inode); > | > } > | > fail: > | > + gfs2_rs_delete(dip); > | > | Should we really be dropping the ip->i_res here I wonder? I'm not > | sure > | that we want to forget this information just because (for example) > | someone has tried to create a new inode and it has been failed for > | some > | reason or other. > | > | Otherwise I think this looks good, > | > | Steve. > > Hi, > > In this implementation, function gfs2_inplace_release just unlocks the > rgrp glock and sets rs_requested to zero. In fact, the reservation still > hangs around, attached to the inode until function gfs2_rs_delete is called > from gfs2_release or gfs2_evict_inode. So instead of having two functions > for allocation and deallocation, we now have four: > That is all ok I think, but I'm referring to the call to gfs2_rs_delete() in this case. We should probably call the combined structure something like struct gfs2_alloc, since it deals with allocation and not just reservations. However, the issue here was dropping the reservation from the directory in the case that the allocation has failed for some reason (maybe permisions, or something like that) Steve. > gfs2_rs_alloc: kmem_cache_zalloc > gfs2_inplace_reserve: hold rgrp glock, rs_requested = reservation needed > gfs2_inplace_release: dq rgrp glock, rs_requested = 0 > gfs2_rs_delete: kmem_cache_free > > I can change the names (suggestions welcome), because "gfs2_inplace_reserve" > and its counterpart have changed their roles over the years. I'm just > conditioned to look for those function names from years of conditioning. :) > > Something more like "get_local_rgrp" is a more accurate reflection of > what's happening, but that name is already used. I've never liked how > get_local_rgrp and gfs2_inplace_reserve both have retry loops. > Since they're now pretty small functions, and one calls the other, perhaps > I should combine them into one function by the name "get_local_rgrp"? > And perhaps that should be done in a separate patch? > > Regards, > > Bob Peterson > Red Hat File Systems ^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] [GFS2 Patch][TRY 3] GFS2: Extend the life of the reservations structure 2012-05-14 13:03 ` Steven Whitehouse @ 2012-05-14 16:15 ` Bob Peterson 2012-05-15 11:41 ` Steven Whitehouse 0 siblings, 1 reply; 7+ messages in thread From: Bob Peterson @ 2012-05-14 16:15 UTC (permalink / raw) To: cluster-devel.redhat.com ----- Original Message ----- | Hi, | | On Mon, 2012-05-14 at 08:58 -0400, Bob Peterson wrote: | > | > fail: | > | > + gfs2_rs_delete(dip); | > | | > | Should we really be dropping the ip->i_res here I wonder? I'm not | > | sure | > | that we want to forget this information just because (for | > | example) | > | someone has tried to create a new inode and it has been failed | > | for | > | some | > | reason or other. | > | | > | Otherwise I think this looks good, | > | | > | Steve. | > | > Hi, | > | > In this implementation, function gfs2_inplace_release just unlocks | > the | > rgrp glock and sets rs_requested to zero. In fact, the reservation | > still | > hangs around, attached to the inode until function gfs2_rs_delete | > is called | > from gfs2_release or gfs2_evict_inode. So instead of having two | > functions | > for allocation and deallocation, we now have four: | > | That is all ok I think, but I'm referring to the call to | gfs2_rs_delete() in this case. We should probably call the combined | structure something like struct gfs2_alloc, since it deals with | allocation and not just reservations. However, the issue here was | dropping the reservation from the directory in the case that the | allocation has failed for some reason (maybe permisions, or something | like that) | | Steve. Hi, Ah, I misunderstood. I see your point now. Here's the revised version. Regards, Bob Peterson Red Hat File Systems Signed-off-by: Bob Peterson <rpeterso@redhat.com> --- Author: Bob Peterson <rpeterso@redhat.com> Date: Thu Apr 12 11:37:24 2012 -0500 GFS2: Extend the life of the reservations structure This patch lengthens the lifespan of the reservations structure for inodes. Before, they were allocated and deallocated for every write operation. With this patch, they are allocated when the first write occurs, and deallocated when the last process closes the file. It's more efficient to do it this way because it saves GFS2 a lot of unnecessary allocates and frees. It also gives us more flexibility for the future: (1) we can now fold the qadata structure back into the structure and save those alloc/frees, (2) we can use this for multi-block reservations. diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e80a464..aba77b5 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (ip->i_res) + if (gfs2_mb_reserved(ip)) gfs2_inplace_release(ip); if (qa) { gfs2_quota_unlock(ip); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 31b199f..3790617 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_file *fp; + struct gfs2_inode *ip = GFS2_I(inode); fp = file->private_data; file->private_data = NULL; + if ((file->f_mode & FMODE_WRITE) && ip->i_res && + (atomic_read(&inode->i_writecount) == 1)) + gfs2_rs_delete(ip); + if (gfs2_assert_warn(sdp, fp)) return -EIO; @@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + struct dentry *dentry = file->f_dentry; + struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + int ret; + + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; if (file->f_flags & O_APPEND) { - struct dentry *dentry = file->f_dentry; - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_holder gh; - int ret; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) @@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; + error = gfs2_rs_alloc(ip); + if (error) + return error; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a9ba244..ec7d93b 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; + error = gfs2_rs_alloc(dip); + if (error) + return error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) goto fail; @@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; + /* the new inode needs a reservation so it can allocate xattrs. */ + error = gfs2_rs_alloc(GFS2_I(inode)); + if (error) + goto fail_gunlock2; + error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_trans_end(sdp); /* Check if we reserved space in the rgrp. Function link_dinode may not, depending on whether alloc is required. */ - if (dip->i_res) + if (gfs2_mb_reserved(dip)) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); gfs2_qadata_put(dip); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f74fb9b..e944fef 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -417,6 +417,39 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } +/** + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * @ip: the inode for this reservation + */ +int gfs2_rs_alloc(struct gfs2_inode *ip) +{ + int error = 0; + + down_write(&ip->i_rw_mutex); + if (!ip->i_res) { + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!ip->i_res) + error = -ENOMEM; + } + up_write(&ip->i_rw_mutex); + return error; +} + +/** + * gfs2_rs_delete - delete a reservation + * @ip: The inode for this reservation + * + */ +void gfs2_rs_delete(struct gfs2_inode *ip) +{ + down_write(&ip->i_rw_mutex); + if (ip->i_res) { + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); + ip->i_res = NULL; + } + up_write(&ip->i_rw_mutex); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -993,22 +1026,6 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) } /** - * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode - * @ip: the incore GFS2 inode structure - * - * Returns: the struct gfs2_qadata - */ - -static int gfs2_blkrsv_get(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res != NULL); - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - return -ENOMEM; - return 0; -} - -/** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data * @ip: the inode @@ -1162,13 +1179,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) return -ENOSPC; } -static void gfs2_blkrsv_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res == NULL); - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; -} - /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1181,14 +1191,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_blkreserv *rs; - int error; + int error = 0; u64 last_unlinked = NO_BLOCK; int tries = 0; - error = gfs2_blkrsv_get(ip); - if (error) - return error; - rs = ip->i_res; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { @@ -1213,7 +1219,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) out: if (error) - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; return error; } @@ -1230,7 +1236,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; } /** @@ -1496,7 +1502,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Only happens if there is a bug in gfs2, return something distinctive * to ensure that it is noticed. */ - if (ip->i_res == NULL) + if (ip->i_res->rs_requested == 0) return -ECANCELED; rgd = ip->i_rgd; diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4b10f4..d9eda5f 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -43,6 +43,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); +extern int gfs2_rs_alloc(struct gfs2_inode *ip); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); @@ -68,4 +70,12 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); +/* This is how to tell if a reservation is "inplace" reserved: */ +static inline int gfs2_mb_reserved(struct gfs2_inode *ip) +{ + if (ip->i_res && ip->i_res->rs_requested) + return 1; + return 0; +} + #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6172fa7..a42df66 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1554,6 +1554,7 @@ out_unlock: out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); + gfs2_rs_delete(ip); end_writeback(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; @@ -1576,6 +1577,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip->i_flags = 0; ip->i_gl = NULL; ip->i_rgd = NULL; + ip->i_res = NULL; } return &ip->i_inode; } diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 125d457..41f42cd 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -31,7 +31,7 @@ struct gfs2_glock; static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) { const struct gfs2_blkreserv *rs = ip->i_res; - if (rs->rs_requested < ip->i_rgd->rd_length) + if (rs && rs->rs_requested < ip->i_rgd->rd_length) return rs->rs_requested + 1; return ip->i_rgd->rd_length; } ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Cluster-devel] [GFS2 Patch][TRY 3] GFS2: Extend the life of the reservations structure 2012-05-14 16:15 ` [Cluster-devel] [GFS2 Patch][TRY 3] " Bob Peterson @ 2012-05-15 11:41 ` Steven Whitehouse 2012-05-18 11:00 ` [Cluster-devel] [GFS2 Patch][TRY 4] " Bob Peterson 0 siblings, 1 reply; 7+ messages in thread From: Steven Whitehouse @ 2012-05-15 11:41 UTC (permalink / raw) To: cluster-devel.redhat.com Hi, That doesn't appear to work for me, unfortunately: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<ffffffffa025c553>] gfs2_inplace_reserve+0x33/0x4b0 [gfs2] PGD 0 Oops: 0002 [#1] PREEMPT SMP CPU 5 Modules linked in: gfs2 ebtable_nat ebtables x_tables bridge stp dlm sctp af_packet bonding ipv6 ext3 jbd loop dm_mirror dm_region_hash dm_log bnx2 sg coretemp hwmon pcspkr microcode sr_mod cdrom ide_pci_generic ide_core ata_piix libata [last unloaded: scsi_wait_scan] Pid: 3075, comm: gfs2_quotad Not tainted 3.4.0-rc4+ #294 Dell Inc. PowerEdge R710/0N047H RIP: 0010:[<ffffffffa025c553>] [<ffffffffa025c553>] gfs2_inplace_reserve+0x33/0x4b0 [gfs2] RSP: 0018:ffff88031d989be0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff880322563680 RCX: 000000000000000c RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff880322563680 RBP: ffff88031d989ca0 R08: 0000000000000000 R09: 0000000000000000 R10: 200cbcfd7b800000 R11: 0000000000000000 R12: 0000000000000002 R13: 000000000000000f R14: ffff8801a5334610 R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff8801aa600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 0000000001c0b000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process gfs2_quotad (pid: 3075, threadinfo ffff88031d988000, task ffff880324b10540) Stack: 0000000000001000 0000000000000000 ffff88031d989c10 0000000000000002 ffff88031d989ca0 ffffffffa0239fdd 0000000000000020 ffffffffa0245106 ffff88031dfb3208 0000000000030273 0000000000001000 0000000000000000 Call Trace: [<ffffffffa0239fdd>] ? gfs2_write_alloc_required+0xbd/0x120 [gfs2] [<ffffffffa0245106>] ? gfs2_glock_wait+0x96/0xa0 [gfs2] [<ffffffffa025781a>] do_sync+0x23a/0x450 [gfs2] [<ffffffffa0257761>] ? do_sync+0x181/0x450 [gfs2] [<ffffffffa0257c9c>] gfs2_quota_sync+0x26c/0x360 [gfs2] [<ffffffffa0257d9b>] gfs2_quota_sync_timeo+0xb/0x10 [gfs2] [<ffffffffa0259ab0>] gfs2_quotad+0x220/0x2c0 [gfs2] [<ffffffff810a9c30>] ? wake_up_bit+0x40/0x40 [<ffffffffa0259890>] ? gfs2_wake_up_statfs+0x40/0x40 [gfs2] [<ffffffff810a9656>] kthread+0xa6/0xb0 [<ffffffff816cd434>] kernel_thread_helper+0x4/0x10 [<ffffffff810b6a77>] ? finish_task_switch+0x77/0x110 [<ffffffff816c4976>] ? _raw_spin_unlock_irq+0x46/0x70 [<ffffffff816c4cb4>] ? retint_restore_args+0x13/0x13 [<ffffffff810a95b0>] ? __init_kthread_worker+0x70/0x70 [<ffffffff816cd430>] ? gs_change+0x13/0x13 Code: 41 55 41 54 53 48 89 fb 48 81 ec 98 00 00 00 85 f6 48 8b 47 28 48 8b 80 b0 05 00 00 48 89 45 b0 48 8b 87 b8 04 00 00 48 89 45 98 <89> 30 0f 84 39 04 00 00 65 48 8b 14 25 c0 b9 00 00 48 c7 45 a8 RIP [<ffffffffa025c553>] gfs2_inplace_reserve+0x33/0x4b0 [gfs2] RSP <ffff88031d989be0> CR2: 0000000000000000 ---[ end trace 1fd6e3c548f2105d ]--- I suspect that quotad needs its own call to set up the new structure. I wonder if there are other places where we do internal writes which also need attention? Steve. On Mon, 2012-05-14 at 12:15 -0400, Bob Peterson wrote: > ----- Original Message ----- > | Hi, > | > | On Mon, 2012-05-14 at 08:58 -0400, Bob Peterson wrote: > | > | > fail: > | > | > + gfs2_rs_delete(dip); > | > | > | > | Should we really be dropping the ip->i_res here I wonder? I'm not > | > | sure > | > | that we want to forget this information just because (for > | > | example) > | > | someone has tried to create a new inode and it has been failed > | > | for > | > | some > | > | reason or other. > | > | > | > | Otherwise I think this looks good, > | > | > | > | Steve. > | > > | > Hi, > | > > | > In this implementation, function gfs2_inplace_release just unlocks > | > the > | > rgrp glock and sets rs_requested to zero. In fact, the reservation > | > still > | > hangs around, attached to the inode until function gfs2_rs_delete > | > is called > | > from gfs2_release or gfs2_evict_inode. So instead of having two > | > functions > | > for allocation and deallocation, we now have four: > | > > | That is all ok I think, but I'm referring to the call to > | gfs2_rs_delete() in this case. We should probably call the combined > | structure something like struct gfs2_alloc, since it deals with > | allocation and not just reservations. However, the issue here was > | dropping the reservation from the directory in the case that the > | allocation has failed for some reason (maybe permisions, or something > | like that) > | > | Steve. > > Hi, > > Ah, I misunderstood. I see your point now. Here's the revised version. > > Regards, > > Bob Peterson > Red Hat File Systems > > Signed-off-by: Bob Peterson <rpeterso@redhat.com> > --- > Author: Bob Peterson <rpeterso@redhat.com> > Date: Thu Apr 12 11:37:24 2012 -0500 > > GFS2: Extend the life of the reservations structure > > This patch lengthens the lifespan of the reservations structure for > inodes. Before, they were allocated and deallocated for every write > operation. With this patch, they are allocated when the first write > occurs, and deallocated when the last process closes the file. > It's more efficient to do it this way because it saves GFS2 a lot of > unnecessary allocates and frees. It also gives us more flexibility > for the future: (1) we can now fold the qadata structure back into > the structure and save those alloc/frees, (2) we can use this for > multi-block reservations. > > diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c > index e80a464..aba77b5 100644 > --- a/fs/gfs2/aops.c > +++ b/fs/gfs2/aops.c > @@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, > brelse(dibh); > failed: > gfs2_trans_end(sdp); > - if (ip->i_res) > + if (gfs2_mb_reserved(ip)) > gfs2_inplace_release(ip); > if (qa) { > gfs2_quota_unlock(ip); > diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c > index 31b199f..3790617 100644 > --- a/fs/gfs2/file.c > +++ b/fs/gfs2/file.c > @@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) > */ > vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); > > + ret = gfs2_rs_alloc(ip); > + if (ret) > + return ret; > + > gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); > ret = gfs2_glock_nq(&gh); > if (ret) > @@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file) > { > struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; > struct gfs2_file *fp; > + struct gfs2_inode *ip = GFS2_I(inode); > > fp = file->private_data; > file->private_data = NULL; > > + if ((file->f_mode & FMODE_WRITE) && ip->i_res && > + (atomic_read(&inode->i_writecount) == 1)) > + gfs2_rs_delete(ip); > + > if (gfs2_assert_warn(sdp, fp)) > return -EIO; > > @@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, > unsigned long nr_segs, loff_t pos) > { > struct file *file = iocb->ki_filp; > + struct dentry *dentry = file->f_dentry; > + struct gfs2_inode *ip = GFS2_I(dentry->d_inode); > + int ret; > + > + ret = gfs2_rs_alloc(ip); > + if (ret) > + return ret; > > if (file->f_flags & O_APPEND) { > - struct dentry *dentry = file->f_dentry; > - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); > struct gfs2_holder gh; > - int ret; > > ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); > if (ret) > @@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, > if (bytes == 0) > bytes = sdp->sd_sb.sb_bsize; > > + error = gfs2_rs_alloc(ip); > + if (error) > + return error; > + > gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); > error = gfs2_glock_nq(&ip->i_gh); > if (unlikely(error)) > diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c > index a9ba244..ec7d93b 100644 > --- a/fs/gfs2/inode.c > +++ b/fs/gfs2/inode.c > @@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, > if (!name->len || name->len > GFS2_FNAMESIZE) > return -ENAMETOOLONG; > > + error = gfs2_rs_alloc(dip); > + if (error) > + return error; > + > error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); > if (error) > goto fail; > @@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, > if (error) > goto fail_gunlock2; > > + /* the new inode needs a reservation so it can allocate xattrs. */ > + error = gfs2_rs_alloc(GFS2_I(inode)); > + if (error) > + goto fail_gunlock2; > + > error = gfs2_acl_create(dip, inode); > if (error) > goto fail_gunlock2; > @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, > gfs2_trans_end(sdp); > /* Check if we reserved space in the rgrp. Function link_dinode may > not, depending on whether alloc is required. */ > - if (dip->i_res) > + if (gfs2_mb_reserved(dip)) > gfs2_inplace_release(dip); > gfs2_quota_unlock(dip); > gfs2_qadata_put(dip); > diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c > index f74fb9b..e944fef 100644 > --- a/fs/gfs2/rgrp.c > +++ b/fs/gfs2/rgrp.c > @@ -417,6 +417,39 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) > } > } > > +/** > + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode > + * @ip: the inode for this reservation > + */ > +int gfs2_rs_alloc(struct gfs2_inode *ip) > +{ > + int error = 0; > + > + down_write(&ip->i_rw_mutex); > + if (!ip->i_res) { > + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); > + if (!ip->i_res) > + error = -ENOMEM; > + } > + up_write(&ip->i_rw_mutex); > + return error; > +} > + > +/** > + * gfs2_rs_delete - delete a reservation > + * @ip: The inode for this reservation > + * > + */ > +void gfs2_rs_delete(struct gfs2_inode *ip) > +{ > + down_write(&ip->i_rw_mutex); > + if (ip->i_res) { > + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); > + ip->i_res = NULL; > + } > + up_write(&ip->i_rw_mutex); > +} > + > void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) > { > struct rb_node *n; > @@ -993,22 +1026,6 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) > } > > /** > - * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode > - * @ip: the incore GFS2 inode structure > - * > - * Returns: the struct gfs2_qadata > - */ > - > -static int gfs2_blkrsv_get(struct gfs2_inode *ip) > -{ > - BUG_ON(ip->i_res != NULL); > - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); > - if (!ip->i_res) > - return -ENOMEM; > - return 0; > -} > - > -/** > * try_rgrp_fit - See if a given reservation will fit in a given RG > * @rgd: the RG data > * @ip: the inode > @@ -1162,13 +1179,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) > return -ENOSPC; > } > > -static void gfs2_blkrsv_put(struct gfs2_inode *ip) > -{ > - BUG_ON(ip->i_res == NULL); > - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); > - ip->i_res = NULL; > -} > - > /** > * gfs2_inplace_reserve - Reserve space in the filesystem > * @ip: the inode to reserve space for > @@ -1181,14 +1191,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) > { > struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); > struct gfs2_blkreserv *rs; > - int error; > + int error = 0; > u64 last_unlinked = NO_BLOCK; > int tries = 0; > > - error = gfs2_blkrsv_get(ip); > - if (error) > - return error; > - > rs = ip->i_res; > rs->rs_requested = requested; > if (gfs2_assert_warn(sdp, requested)) { > @@ -1213,7 +1219,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) > > out: > if (error) > - gfs2_blkrsv_put(ip); > + rs->rs_requested = 0; > return error; > } > > @@ -1230,7 +1236,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) > > if (rs->rs_rgd_gh.gh_gl) > gfs2_glock_dq_uninit(&rs->rs_rgd_gh); > - gfs2_blkrsv_put(ip); > + rs->rs_requested = 0; > } > > /** > @@ -1496,7 +1502,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, > /* Only happens if there is a bug in gfs2, return something distinctive > * to ensure that it is noticed. > */ > - if (ip->i_res == NULL) > + if (ip->i_res->rs_requested == 0) > return -ECANCELED; > > rgd = ip->i_rgd; > diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h > index b4b10f4..d9eda5f 100644 > --- a/fs/gfs2/rgrp.h > +++ b/fs/gfs2/rgrp.h > @@ -43,6 +43,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); > extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, > bool dinode, u64 *generation); > > +extern int gfs2_rs_alloc(struct gfs2_inode *ip); > +extern void gfs2_rs_delete(struct gfs2_inode *ip); > extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); > extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); > extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); > @@ -68,4 +70,12 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, > const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); > extern int gfs2_fitrim(struct file *filp, void __user *argp); > > +/* This is how to tell if a reservation is "inplace" reserved: */ > +static inline int gfs2_mb_reserved(struct gfs2_inode *ip) > +{ > + if (ip->i_res && ip->i_res->rs_requested) > + return 1; > + return 0; > +} > + > #endif /* __RGRP_DOT_H__ */ > diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c > index 6172fa7..a42df66 100644 > --- a/fs/gfs2/super.c > +++ b/fs/gfs2/super.c > @@ -1554,6 +1554,7 @@ out_unlock: > out: > /* Case 3 starts here */ > truncate_inode_pages(&inode->i_data, 0); > + gfs2_rs_delete(ip); > end_writeback(inode); > gfs2_dir_hash_inval(ip); > ip->i_gl->gl_object = NULL; > @@ -1576,6 +1577,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) > ip->i_flags = 0; > ip->i_gl = NULL; > ip->i_rgd = NULL; > + ip->i_res = NULL; > } > return &ip->i_inode; > } > diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h > index 125d457..41f42cd 100644 > --- a/fs/gfs2/trans.h > +++ b/fs/gfs2/trans.h > @@ -31,7 +31,7 @@ struct gfs2_glock; > static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) > { > const struct gfs2_blkreserv *rs = ip->i_res; > - if (rs->rs_requested < ip->i_rgd->rd_length) > + if (rs && rs->rs_requested < ip->i_rgd->rd_length) > return rs->rs_requested + 1; > return ip->i_rgd->rd_length; > } ^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] [GFS2 Patch][TRY 4] GFS2: Extend the life of the reservations structure 2012-05-15 11:41 ` Steven Whitehouse @ 2012-05-18 11:00 ` Bob Peterson 0 siblings, 0 replies; 7+ messages in thread From: Bob Peterson @ 2012-05-18 11:00 UTC (permalink / raw) To: cluster-devel.redhat.com Hi, Sorry for the problems encountered on try #3 of this patch. For Try #4, I've systematically gone through all the places where a reservation is necessary, and made sure one was created. I've also tested it with quotas enabled against postmark, fs_mark, and iozone. This patch lengthens the lifespan of the reservations structure for inodes. Before, they were allocated and deallocated for every write operation. With this patch, they are allocated when the first write occurs, and deallocated when the last process closes the file. It's more efficient to do it this way because it saves GFS2 a lot of unnecessary allocates and frees. It also gives us more flexibility for the future: (1) we can now fold the qadata structure back into the structure and save those alloc/frees, (2) we can use this for multi-block reservations. Regards, Bob Peterson Red Hat File Systems Signed-off-by: Bob Peterson <rpeterso@redhat.com> --- diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e80a464..aba77b5 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -878,7 +878,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (ip->i_res) + if (gfs2_mb_reserved(ip)) gfs2_inplace_release(ip); if (qa) { gfs2_quota_unlock(ip); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 31b199f..3790617 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -376,6 +376,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -569,10 +573,15 @@ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; struct gfs2_file *fp; + struct gfs2_inode *ip = GFS2_I(inode); fp = file->private_data; file->private_data = NULL; + if ((file->f_mode & FMODE_WRITE) && ip->i_res && + (atomic_read(&inode->i_writecount) == 1)) + gfs2_rs_delete(ip); + if (gfs2_assert_warn(sdp, fp)) return -EIO; @@ -653,12 +662,16 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + struct dentry *dentry = file->f_dentry; + struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + int ret; + + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; if (file->f_flags & O_APPEND) { - struct dentry *dentry = file->f_dentry; - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_holder gh; - int ret; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) @@ -774,6 +787,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; + error = gfs2_rs_alloc(ip); + if (error) + return error; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a9ba244..2a1b4b5 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -667,6 +667,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; + error = gfs2_rs_alloc(dip); + if (error) + return error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) goto fail; @@ -704,6 +708,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; + /* the new inode needs a reservation so it can allocate xattrs. */ + error = gfs2_rs_alloc(GFS2_I(inode)); + if (error) + goto fail_gunlock2; + error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; @@ -722,7 +731,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_trans_end(sdp); /* Check if we reserved space in the rgrp. Function link_dinode may not, depending on whether alloc is required. */ - if (dip->i_res) + if (gfs2_mb_reserved(dip)) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); gfs2_qadata_put(dip); @@ -819,6 +828,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (S_ISDIR(inode->i_mode)) return -EPERM; + error = gfs2_rs_alloc(dip); + if (error) + return error; + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); @@ -1234,6 +1247,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) return error; + error = gfs2_rs_alloc(ndip); + if (error) + return error; + if (odip != ndip) { error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, &r_gh); @@ -1644,6 +1661,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) struct gfs2_holder i_gh; int error; + error = gfs2_rs_alloc(ip); + if (error) + return error; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) return error; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index b97178e..197cc2d 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -764,6 +764,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) unsigned int nalloc = 0, blocks; int error; + error = gfs2_rs_alloc(ip); + if (error) + return error; + gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); @@ -1549,10 +1553,14 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, if (error) return error; + error = gfs2_rs_alloc(ip); + if (error) + goto out_put; + mutex_lock(&ip->i_inode.i_mutex); error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh); if (error) - goto out_put; + goto out_unlockput; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) goto out_q; @@ -1609,8 +1617,9 @@ out_i: gfs2_glock_dq_uninit(&i_gh); out_q: gfs2_glock_dq_uninit(&q_gh); -out_put: +out_unlockput: mutex_unlock(&ip->i_inode.i_mutex); +out_put: qd_put(qd); return error; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f74fb9b..e944fef 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -417,6 +417,39 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } +/** + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * @ip: the inode for this reservation + */ +int gfs2_rs_alloc(struct gfs2_inode *ip) +{ + int error = 0; + + down_write(&ip->i_rw_mutex); + if (!ip->i_res) { + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!ip->i_res) + error = -ENOMEM; + } + up_write(&ip->i_rw_mutex); + return error; +} + +/** + * gfs2_rs_delete - delete a reservation + * @ip: The inode for this reservation + * + */ +void gfs2_rs_delete(struct gfs2_inode *ip) +{ + down_write(&ip->i_rw_mutex); + if (ip->i_res) { + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); + ip->i_res = NULL; + } + up_write(&ip->i_rw_mutex); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -993,22 +1026,6 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) } /** - * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode - * @ip: the incore GFS2 inode structure - * - * Returns: the struct gfs2_qadata - */ - -static int gfs2_blkrsv_get(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res != NULL); - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - return -ENOMEM; - return 0; -} - -/** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data * @ip: the inode @@ -1162,13 +1179,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) return -ENOSPC; } -static void gfs2_blkrsv_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res == NULL); - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; -} - /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1181,14 +1191,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_blkreserv *rs; - int error; + int error = 0; u64 last_unlinked = NO_BLOCK; int tries = 0; - error = gfs2_blkrsv_get(ip); - if (error) - return error; - rs = ip->i_res; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { @@ -1213,7 +1219,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) out: if (error) - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; return error; } @@ -1230,7 +1236,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; } /** @@ -1496,7 +1502,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Only happens if there is a bug in gfs2, return something distinctive * to ensure that it is noticed. */ - if (ip->i_res == NULL) + if (ip->i_res->rs_requested == 0) return -ECANCELED; rgd = ip->i_rgd; diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4b10f4..d9eda5f 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -43,6 +43,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); +extern int gfs2_rs_alloc(struct gfs2_inode *ip); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); @@ -68,4 +70,12 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); +/* This is how to tell if a reservation is "inplace" reserved: */ +static inline int gfs2_mb_reserved(struct gfs2_inode *ip) +{ + if (ip->i_res && ip->i_res->rs_requested) + return 1; + return 0; +} + #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6172fa7..a42df66 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1554,6 +1554,7 @@ out_unlock: out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); + gfs2_rs_delete(ip); end_writeback(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; @@ -1576,6 +1577,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip->i_flags = 0; ip->i_gl = NULL; ip->i_rgd = NULL; + ip->i_res = NULL; } return &ip->i_inode; } diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 125d457..41f42cd 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -31,7 +31,7 @@ struct gfs2_glock; static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) { const struct gfs2_blkreserv *rs = ip->i_res; - if (rs->rs_requested < ip->i_rgd->rd_length) + if (rs && rs->rs_requested < ip->i_rgd->rd_length) return rs->rs_requested + 1; return ip->i_rgd->rd_length; } ^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2012-05-18 11:00 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- [not found] <873bd4a2-fac6-4347-b487-832e2f969344@zmail12.collab.prod.int.phx2.redhat.com> 2012-05-09 16:22 ` [Cluster-devel] [GFS2 Patch][TRY 2] GFS2: Extend the life of the reservations structure Bob Peterson 2012-05-11 9:36 ` Steven Whitehouse 2012-05-14 12:58 ` Bob Peterson 2012-05-14 13:03 ` Steven Whitehouse 2012-05-14 16:15 ` [Cluster-devel] [GFS2 Patch][TRY 3] " Bob Peterson 2012-05-15 11:41 ` Steven Whitehouse 2012-05-18 11:00 ` [Cluster-devel] [GFS2 Patch][TRY 4] " Bob Peterson
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).