All of lore.kernel.org
 help / color / mirror / Atom feed
* [Cluster-devel] [GFS2 PATCH] GFS2: Use reference count for multi-block reservations
       [not found] <970678731.38907530.1436972414921.JavaMail.zimbra@redhat.com>
@ 2015-07-15 15:02 ` Bob Peterson
  2015-07-16  8:56   ` Steven Whitehouse
  0 siblings, 1 reply; 3+ messages in thread
From: Bob Peterson @ 2015-07-15 15:02 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

This patch changes the scheme for multi-block reservations so that
it uses a reference count rather than relying upon the last closer
to delete it. This is necessary because multiple operations can
require a reservation structure that don't involved file open/close,
i.e. inode operations. We need to ensure that the last file closer
doesn't delete the reservation structure while it's being used by
an inode operation.

Regards,

Bob Peterson
Red Hat File Systems

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 61296ec..3daf05b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1291,15 +1291,11 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 	if (ret)
 		return ret;
 
-	ret = get_write_access(inode);
-	if (ret)
-		return ret;
-
 	inode_dio_wait(inode);
 
-	ret = gfs2_rs_alloc(ip);
+	ret = gfs2_rs_get(ip);
 	if (ret)
-		goto out;
+		return ret;
 
 	oldsize = inode->i_size;
 	if (newsize >= oldsize) {
@@ -1307,10 +1303,9 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 		goto out;
 	}
 
-	gfs2_rs_deltree(ip->i_res);
 	ret = do_shrink(inode, oldsize, newsize);
 out:
-	put_write_access(inode);
+	gfs2_rs_put(GFS2_I(inode));
 	return ret;
 }
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index cf4ab89..0c3a69b 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -397,14 +397,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	/* Update file times before taking page lock */
 	file_update_time(vma->vm_file);
 
-	ret = get_write_access(inode);
+	ret = gfs2_rs_get(ip);
 	if (ret)
 		goto out;
 
-	ret = gfs2_rs_alloc(ip);
-	if (ret)
-		goto out_write_access;
-
 	gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
@@ -486,8 +482,7 @@ out_uninit:
 		set_page_dirty(page);
 		wait_for_stable_page(page);
 	}
-out_write_access:
-	put_write_access(inode);
+	gfs2_rs_put(ip);
 out:
 	sb_end_pagefault(inode->i_sb);
 	return block_page_mkwrite_return(ret);
@@ -602,6 +597,9 @@ static int gfs2_open(struct inode *inode, struct file *file)
 	if (need_unlock)
 		gfs2_glock_dq_uninit(&i_gh);
 
+	if (error == 0 && file->f_mode & FMODE_WRITE)
+		error = gfs2_rs_get(ip);
+
 	return error;
 }
 
@@ -620,10 +618,9 @@ static int gfs2_release(struct inode *inode, struct file *file)
 	kfree(file->private_data);
 	file->private_data = NULL;
 
-	if (!(file->f_mode & FMODE_WRITE))
-		return 0;
+	if (file->f_mode & FMODE_WRITE)
+		gfs2_rs_put(ip);
 
-	gfs2_rs_delete(ip, &inode->i_writecount);
 	return 0;
 }
 
@@ -703,7 +700,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct gfs2_inode *ip = GFS2_I(file_inode(file));
 	int ret;
 
-	ret = gfs2_rs_alloc(ip);
+	ret = gfs2_rs_get(ip);
 	if (ret)
 		return ret;
 
@@ -714,11 +711,14 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 		if (ret)
-			return ret;
+			goto out;
 		gfs2_glock_dq_uninit(&gh);
 	}
 
-	return generic_file_write_iter(iocb, from);
+	ret = generic_file_write_iter(iocb, from);
+out:
+	gfs2_rs_put(ip);
+	return ret;
 }
 
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
@@ -934,19 +934,11 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
 			goto out_unlock;
 	}
 
-	ret = get_write_access(inode);
-	if (ret)
-		goto out_unlock;
-
-	ret = gfs2_rs_alloc(ip);
-	if (ret)
-		goto out_putw;
-
-	ret = __gfs2_fallocate(file, mode, offset, len);
-	if (ret)
-		gfs2_rs_deltree(ip->i_res);
-out_putw:
-	put_write_access(inode);
+	ret = gfs2_rs_get(ip);
+	if (ret == 0) {
+		ret = __gfs2_fallocate(file, mode, offset, len);
+		gfs2_rs_put(ip);
+	}
 out_unlock:
 	gfs2_glock_dq(&gh);
 out_uninit:
@@ -962,13 +954,15 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
 	int error;
 	struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
 
-	error = gfs2_rs_alloc(ip);
+	error = gfs2_rs_get(ip);
 	if (error)
 		return (ssize_t)error;
 
 	gfs2_size_hint(out, *ppos, len);
 
-	return iter_file_splice_write(pipe, out, ppos, len, flags);
+	error = iter_file_splice_write(pipe, out, ppos, len, flags);
+	gfs2_rs_put(ip);
+	return (ssize_t)error;
 }
 
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e300f74..f7603e1 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -288,6 +288,7 @@ struct gfs2_blkreserv {
 	struct gfs2_rbm rs_rbm;       /* Start of reservation */
 	u32 rs_free;                  /* how many blocks are still free */
 	u64 rs_inum;                  /* Inode number for reservation */
+	atomic_t rs_ref;              /* References to the reservation */
 
 	/* ancillary quota stuff */
 	struct gfs2_quota_data *rs_qa_qd[2 * GFS2_MAXQUOTAS];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 063fdfc..7c58d25 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -601,13 +601,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return -ENAMETOOLONG;
 
-	error = gfs2_rs_alloc(dip);
+	error = gfs2_rs_get(dip);
 	if (error)
 		return error;
 
 	error = gfs2_rindex_update(sdp);
-	if (error)
+	if (error) {
+		gfs2_rs_put(dip);
 		return error;
+	}
 
 	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	if (error)
@@ -634,6 +636,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 				error = finish_no_open(file, NULL);
 		}
 		gfs2_glock_dq_uninit(ghs);
+		gfs2_rs_put(dip);
 		return error;
 	} else if (error != -ENOENT) {
 		goto fail_gunlock;
@@ -653,7 +656,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		goto fail_free_vfs_inode;
 
 	ip = GFS2_I(inode);
-	error = gfs2_rs_alloc(ip);
+	error = gfs2_rs_get(ip);
 	if (error)
 		goto fail_free_acls;
 
@@ -696,18 +699,18 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 
 	error = alloc_dinode(ip, aflags, &blocks);
 	if (error)
-		goto fail_free_inode;
+		goto fail_rs_put;
 
 	gfs2_set_inode_blocks(inode, blocks);
 
 	error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
 	if (error)
-		goto fail_free_inode;
+		goto fail_rs_put;
 
 	ip->i_gl->gl_object = ip;
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
 	if (error)
-		goto fail_free_inode;
+		goto fail_rs_put;
 
 	error = gfs2_trans_begin(sdp, blocks, 0);
 	if (error)
@@ -763,20 +766,22 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	}
 	gfs2_glock_dq_uninit(ghs);
 	gfs2_glock_dq_uninit(ghs + 1);
+	gfs2_rs_put(dip);
 	return error;
 
 fail_gunlock3:
 	gfs2_glock_dq_uninit(ghs + 1);
+	gfs2_rs_put(ip);
 	if (ip->i_gl)
 		gfs2_glock_put(ip->i_gl);
 	goto fail_gunlock;
 
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
-fail_free_inode:
+fail_rs_put:
+	gfs2_rs_put(ip);
 	if (ip->i_gl)
 		gfs2_glock_put(ip->i_gl);
-	gfs2_rs_delete(ip, NULL);
 fail_free_acls:
 	if (default_acl)
 		posix_acl_release(default_acl);
@@ -796,6 +801,7 @@ fail_gunlock:
 		iput(inode);
 	}
 fail:
+	gfs2_rs_put(dip);
 	return error;
 }
 
@@ -898,7 +904,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	error = gfs2_rs_alloc(dip);
+	error = gfs2_rs_get(dip);
 	if (error)
 		return error;
 
@@ -1003,6 +1009,7 @@ out_child:
 out_parent:
 	gfs2_holder_uninit(ghs);
 	gfs2_holder_uninit(ghs + 1);
+	gfs2_rs_put(dip);
 	return error;
 }
 
@@ -1371,7 +1378,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	if (error)
 		return error;
 
-	error = gfs2_rs_alloc(ndip);
+	error = gfs2_rs_get(ndip);
 	if (error)
 		return error;
 
@@ -1553,6 +1560,7 @@ out_gunlock_r:
 	if (r_gh.gh_gl)
 		gfs2_glock_dq_uninit(&r_gh);
 out:
+	gfs2_rs_put(ndip);
 	return error;
 }
 
@@ -1854,21 +1862,17 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
 		ogid = ngid = NO_GID_QUOTA_CHANGE;
 
-	error = get_write_access(inode);
-	if (error)
-		return error;
-
-	error = gfs2_rs_alloc(ip);
+	error = gfs2_rs_get(ip);
 	if (error)
 		goto out;
 
 	error = gfs2_rindex_update(sdp);
 	if (error)
-		goto out;
+		goto out_put;
 
 	error = gfs2_quota_lock(ip, nuid, ngid);
 	if (error)
-		goto out;
+		goto out_put;
 
 	ap.target = gfs2_get_inode_blocks(&ip->i_inode);
 
@@ -1897,8 +1901,9 @@ out_end_trans:
 	gfs2_trans_end(sdp);
 out_gunlock_q:
 	gfs2_quota_unlock(ip);
+out_put:
+	gfs2_rs_put(ip);
 out:
-	put_write_access(inode);
 	return error;
 }
 
@@ -1920,21 +1925,21 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	struct gfs2_holder i_gh;
 	int error;
 
-	error = gfs2_rs_alloc(ip);
+	error = gfs2_rs_get(ip);
 	if (error)
 		return error;
 
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
 	if (error)
-		return error;
+		goto out;
 
 	error = -EPERM;
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		goto out;
+		goto out_dq;
 
 	error = inode_change_ok(inode, attr);
 	if (error)
-		goto out;
+		goto out_dq;
 
 	if (attr->ia_valid & ATTR_SIZE)
 		error = gfs2_setattr_size(inode, attr->ia_size);
@@ -1946,10 +1951,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 			error = posix_acl_chmod(inode, inode->i_mode);
 	}
 
-out:
+out_dq:
 	if (!error)
 		mark_inode_dirty(inode);
 	gfs2_glock_dq_uninit(&i_gh);
+out:
+	gfs2_rs_put(ip);
 	return error;
 }
 
@@ -2002,9 +2009,10 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret == 0) {
-		ret = gfs2_rs_alloc(ip);
+		ret = gfs2_rs_get(ip);
 		if (ret == 0)
 			ret = generic_setxattr(dentry, name, data, size, flags);
+		gfs2_rs_put(ip);
 		gfs2_glock_dq(&gh);
 	}
 	gfs2_holder_uninit(&gh);
@@ -2043,9 +2051,11 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret == 0) {
-		ret = gfs2_rs_alloc(ip);
-		if (ret == 0)
+		ret = gfs2_rs_get(ip);
+		if (ret == 0) {
 			ret = generic_removexattr(dentry, name);
+			gfs2_rs_put(ip);
+		}
 		gfs2_glock_dq(&gh);
 	}
 	gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3a31226..c52eab1 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -533,20 +533,22 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 	struct gfs2_quota_data **qd;
 	int error;
 
-	if (ip->i_res == NULL) {
-		error = gfs2_rs_alloc(ip);
-		if (error)
-			return error;
-	}
+	error = gfs2_rs_get(ip);
+	if (error)
+		return error;
 
 	qd = ip->i_res->rs_qa_qd;
 
 	if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
-	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
-		return -EIO;
+	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) {
+		error = -EIO;
+		goto out_err;
+	}
 
-	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
-		return 0;
+	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) {
+		error = 0;
+		goto out_err;
+	}
 
 	error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
 	if (error)
@@ -581,6 +583,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 out:
 	if (error)
 		gfs2_quota_unhold(ip);
+out_err:
 	return error;
 }
 
@@ -598,6 +601,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip)
 		ip->i_res->rs_qa_qd[x] = NULL;
 	}
 	ip->i_res->rs_qa_qd_num = 0;
+	gfs2_rs_put(ip);
 }
 
 static int sort_qd(const void *a, const void *b)
@@ -843,7 +847,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	unsigned int nalloc = 0, blocks;
 	int error;
 
-	error = gfs2_rs_alloc(ip);
+	error = gfs2_rs_get(ip);
 	if (error)
 		return error;
 
@@ -851,8 +855,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 			      &data_blocks, &ind_blocks);
 
 	ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
-	if (!ghs)
-		return -ENOMEM;
+	if (!ghs) {
+		error = -ENOMEM;
+		goto out_err;
+	}
 
 	sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
 	mutex_lock(&ip->i_inode.i_mutex);
@@ -923,6 +929,8 @@ out:
 	mutex_unlock(&ip->i_inode.i_mutex);
 	kfree(ghs);
 	gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
+out_err:
+	gfs2_rs_put(ip);
 	return error;
 }
 
@@ -1635,7 +1643,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
 	if (error)
 		return error;
 
-	error = gfs2_rs_alloc(ip);
+	error = gfs2_rs_get(ip);
 	if (error)
 		goto out_put;
 
@@ -1705,6 +1713,7 @@ out_q:
 	gfs2_glock_dq_uninit(&q_gh);
 out_unlockput:
 	mutex_unlock(&ip->i_inode.i_mutex);
+	gfs2_rs_put(ip);
 out_put:
 	qd_put(qd);
 	return error;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c92ae7fd..89e9b66 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -596,24 +596,27 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 }
 
 /**
- * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
+ * gfs2_rs_get - make sure we have a reservation assigned to the inode
  * @ip: the inode for this reservation
  */
-int gfs2_rs_alloc(struct gfs2_inode *ip)
+int gfs2_rs_get(struct gfs2_inode *ip)
 {
 	int error = 0;
 
 	down_write(&ip->i_rw_mutex);
 	if (ip->i_res)
-		goto out;
+		goto out_incr;
 
 	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
 	if (!ip->i_res) {
 		error = -ENOMEM;
 		goto out;
 	}
+	atomic_set(&ip->i_res->rs_ref, 0);
 
 	RB_CLEAR_NODE(&ip->i_res->rs_node);
+out_incr:
+	atomic_inc(&ip->i_res->rs_ref);
 out:
 	up_write(&ip->i_rw_mutex);
 	return error;
@@ -621,10 +624,11 @@ out:
 
 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 {
-	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
+	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u r:%u\n",
 		       (unsigned long long)rs->rs_inum,
 		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
-		       rs->rs_rbm.offset, rs->rs_free);
+		       rs->rs_rbm.offset, rs->rs_free,
+		       atomic_read(&rs->rs_ref));
 }
 
 /**
@@ -678,15 +682,15 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 }
 
 /**
- * gfs2_rs_delete - delete a multi-block reservation
+ * gfs2_rs_put - put a multi-block reservation
  * @ip: The inode for this reservation
  * @wcount: The inode's write count, or NULL
  *
  */
-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_put(struct gfs2_inode *ip)
 {
 	down_write(&ip->i_rw_mutex);
-	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
+	if (ip->i_res && atomic_dec_and_test(&ip->i_res->rs_ref)) {
 		gfs2_rs_deltree(ip->i_res);
 		BUG_ON(ip->i_res->rs_free);
 		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index c0ab33f..51cce86 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -49,9 +49,9 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
-extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern int gfs2_rs_get(struct gfs2_inode *ip);
 extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_put(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2982445..b4d12a1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1607,7 +1607,8 @@ out_unlock:
 out:
 	/* Case 3 starts here */
 	truncate_inode_pages_final(&inode->i_data);
-	gfs2_rs_delete(ip, NULL);
+	gfs2_rs_put(ip);
+	BUG_ON(ip->i_res);
 	gfs2_ordered_del_inode(ip);
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index fff47d0..811a809 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -32,13 +32,13 @@
 			    { GFS2_BLKST_DINODE, "dinode" },	\
 			    { GFS2_BLKST_UNLINKED, "unlinked" })
 
-#define TRACE_RS_DELETE  0
+#define TRACE_RS_PUT     0
 #define TRACE_RS_TREEDEL 1
 #define TRACE_RS_INSERT  2
 #define TRACE_RS_CLAIM   3
 
 #define rs_func_name(x) __print_symbolic(x,	\
-					 { 0, "del " },	\
+					 { 0, "put " },	\
 					 { 1, "tdel" },	\
 					 { 2, "ins " },	\
 					 { 3, "clm " })
@@ -524,6 +524,7 @@ TRACE_EVENT(gfs2_rs,
 		__field(	u64,	inum			)
 		__field(	u64,	start			)
 		__field(	u32,	free			)
+		__field(	u32,	ref			)
 		__field(	u8,	func			)
 	),
 
@@ -535,17 +536,20 @@ TRACE_EVENT(gfs2_rs,
 		__entry->inum		= rs->rs_inum;
 		__entry->start		= gfs2_rbm_to_block(&rs->rs_rbm);
 		__entry->free		= rs->rs_free;
+		__entry->ref		= atomic_read(&rs->rs_ref);
 		__entry->func		= func;
 	),
 
-	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
+	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu "
+		  "r:%lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->rd_addr,
 		  (unsigned long)__entry->rd_free_clone,
 		  (unsigned long)__entry->rd_reserved,
-		  rs_func_name(__entry->func), (unsigned long)__entry->free)
+		  rs_func_name(__entry->func), (unsigned long)__entry->free,
+		  (unsigned long)__entry->ref)
 );
 
 #endif /* _TRACE_GFS2_H */



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [Cluster-devel] [GFS2 PATCH] GFS2: Use reference count for multi-block reservations
  2015-07-15 15:02 ` [Cluster-devel] [GFS2 PATCH] GFS2: Use reference count for multi-block reservations Bob Peterson
@ 2015-07-16  8:56   ` Steven Whitehouse
  2015-07-17 13:21     ` [Cluster-devel] [GFS2 PATCH] GFS2: Make rgrp reservations part of the gfs2_inode structure Bob Peterson
  0 siblings, 1 reply; 3+ messages in thread
From: Steven Whitehouse @ 2015-07-16  8:56 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On 15/07/15 16:02, Bob Peterson wrote:
> Hi,
>
> This patch changes the scheme for multi-block reservations so that
> it uses a reference count rather than relying upon the last closer
> to delete it. This is necessary because multiple operations can
> require a reservation structure that don't involved file open/close,
> i.e. inode operations. We need to ensure that the last file closer
> doesn't delete the reservation structure while it's being used by
> an inode operation.
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
Having to add a reference count does seem a bit heavy weight here. We 
could go back to the scheme of simply including the reservation 
structure in the inode to make it simpler, at the expense of more memory 
usage for inodes that are not actively being written to. Either way I'd 
prefer to avoid using an atomic ref count that has to be tweeked on 
every write, since that will likely be an issue with multi-threaded 
workloads, which was why we avoided going down that route in the first 
place,

Steve.

> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 61296ec..3daf05b 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -1291,15 +1291,11 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
>   	if (ret)
>   		return ret;
>   
> -	ret = get_write_access(inode);
> -	if (ret)
> -		return ret;
> -
>   	inode_dio_wait(inode);
>   
> -	ret = gfs2_rs_alloc(ip);
> +	ret = gfs2_rs_get(ip);
>   	if (ret)
> -		goto out;
> +		return ret;
>   
>   	oldsize = inode->i_size;
>   	if (newsize >= oldsize) {
> @@ -1307,10 +1303,9 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
>   		goto out;
>   	}
>   
> -	gfs2_rs_deltree(ip->i_res);
>   	ret = do_shrink(inode, oldsize, newsize);
>   out:
> -	put_write_access(inode);
> +	gfs2_rs_put(GFS2_I(inode));
>   	return ret;
>   }
>   
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index cf4ab89..0c3a69b 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -397,14 +397,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
>   	/* Update file times before taking page lock */
>   	file_update_time(vma->vm_file);
>   
> -	ret = get_write_access(inode);
> +	ret = gfs2_rs_get(ip);
>   	if (ret)
>   		goto out;
>   
> -	ret = gfs2_rs_alloc(ip);
> -	if (ret)
> -		goto out_write_access;
> -
>   	gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
>   
>   	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
> @@ -486,8 +482,7 @@ out_uninit:
>   		set_page_dirty(page);
>   		wait_for_stable_page(page);
>   	}
> -out_write_access:
> -	put_write_access(inode);
> +	gfs2_rs_put(ip);
>   out:
>   	sb_end_pagefault(inode->i_sb);
>   	return block_page_mkwrite_return(ret);
> @@ -602,6 +597,9 @@ static int gfs2_open(struct inode *inode, struct file *file)
>   	if (need_unlock)
>   		gfs2_glock_dq_uninit(&i_gh);
>   
> +	if (error == 0 && file->f_mode & FMODE_WRITE)
> +		error = gfs2_rs_get(ip);
> +
>   	return error;
>   }
>   
> @@ -620,10 +618,9 @@ static int gfs2_release(struct inode *inode, struct file *file)
>   	kfree(file->private_data);
>   	file->private_data = NULL;
>   
> -	if (!(file->f_mode & FMODE_WRITE))
> -		return 0;
> +	if (file->f_mode & FMODE_WRITE)
> +		gfs2_rs_put(ip);
>   
> -	gfs2_rs_delete(ip, &inode->i_writecount);
>   	return 0;
>   }
>   
> @@ -703,7 +700,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   	struct gfs2_inode *ip = GFS2_I(file_inode(file));
>   	int ret;
>   
> -	ret = gfs2_rs_alloc(ip);
> +	ret = gfs2_rs_get(ip);
>   	if (ret)
>   		return ret;
>   
> @@ -714,11 +711,14 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   
>   		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
>   		if (ret)
> -			return ret;
> +			goto out;
>   		gfs2_glock_dq_uninit(&gh);
>   	}
>   
> -	return generic_file_write_iter(iocb, from);
> +	ret = generic_file_write_iter(iocb, from);
> +out:
> +	gfs2_rs_put(ip);
> +	return ret;
>   }
>   
>   static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
> @@ -934,19 +934,11 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
>   			goto out_unlock;
>   	}
>   
> -	ret = get_write_access(inode);
> -	if (ret)
> -		goto out_unlock;
> -
> -	ret = gfs2_rs_alloc(ip);
> -	if (ret)
> -		goto out_putw;
> -
> -	ret = __gfs2_fallocate(file, mode, offset, len);
> -	if (ret)
> -		gfs2_rs_deltree(ip->i_res);
> -out_putw:
> -	put_write_access(inode);
> +	ret = gfs2_rs_get(ip);
> +	if (ret == 0) {
> +		ret = __gfs2_fallocate(file, mode, offset, len);
> +		gfs2_rs_put(ip);
> +	}
>   out_unlock:
>   	gfs2_glock_dq(&gh);
>   out_uninit:
> @@ -962,13 +954,15 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
>   	int error;
>   	struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
>   
> -	error = gfs2_rs_alloc(ip);
> +	error = gfs2_rs_get(ip);
>   	if (error)
>   		return (ssize_t)error;
>   
>   	gfs2_size_hint(out, *ppos, len);
>   
> -	return iter_file_splice_write(pipe, out, ppos, len, flags);
> +	error = iter_file_splice_write(pipe, out, ppos, len, flags);
> +	gfs2_rs_put(ip);
> +	return (ssize_t)error;
>   }
>   
>   #ifdef CONFIG_GFS2_FS_LOCKING_DLM
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index e300f74..f7603e1 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -288,6 +288,7 @@ struct gfs2_blkreserv {
>   	struct gfs2_rbm rs_rbm;       /* Start of reservation */
>   	u32 rs_free;                  /* how many blocks are still free */
>   	u64 rs_inum;                  /* Inode number for reservation */
> +	atomic_t rs_ref;              /* References to the reservation */
>   
>   	/* ancillary quota stuff */
>   	struct gfs2_quota_data *rs_qa_qd[2 * GFS2_MAXQUOTAS];
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 063fdfc..7c58d25 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -601,13 +601,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>   	if (!name->len || name->len > GFS2_FNAMESIZE)
>   		return -ENAMETOOLONG;
>   
> -	error = gfs2_rs_alloc(dip);
> +	error = gfs2_rs_get(dip);
>   	if (error)
>   		return error;
>   
>   	error = gfs2_rindex_update(sdp);
> -	if (error)
> +	if (error) {
> +		gfs2_rs_put(dip);
>   		return error;
> +	}
>   
>   	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
>   	if (error)
> @@ -634,6 +636,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>   				error = finish_no_open(file, NULL);
>   		}
>   		gfs2_glock_dq_uninit(ghs);
> +		gfs2_rs_put(dip);
>   		return error;
>   	} else if (error != -ENOENT) {
>   		goto fail_gunlock;
> @@ -653,7 +656,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>   		goto fail_free_vfs_inode;
>   
>   	ip = GFS2_I(inode);
> -	error = gfs2_rs_alloc(ip);
> +	error = gfs2_rs_get(ip);
>   	if (error)
>   		goto fail_free_acls;
>   
> @@ -696,18 +699,18 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>   
>   	error = alloc_dinode(ip, aflags, &blocks);
>   	if (error)
> -		goto fail_free_inode;
> +		goto fail_rs_put;
>   
>   	gfs2_set_inode_blocks(inode, blocks);
>   
>   	error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
>   	if (error)
> -		goto fail_free_inode;
> +		goto fail_rs_put;
>   
>   	ip->i_gl->gl_object = ip;
>   	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
>   	if (error)
> -		goto fail_free_inode;
> +		goto fail_rs_put;
>   
>   	error = gfs2_trans_begin(sdp, blocks, 0);
>   	if (error)
> @@ -763,20 +766,22 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
>   	}
>   	gfs2_glock_dq_uninit(ghs);
>   	gfs2_glock_dq_uninit(ghs + 1);
> +	gfs2_rs_put(dip);
>   	return error;
>   
>   fail_gunlock3:
>   	gfs2_glock_dq_uninit(ghs + 1);
> +	gfs2_rs_put(ip);
>   	if (ip->i_gl)
>   		gfs2_glock_put(ip->i_gl);
>   	goto fail_gunlock;
>   
>   fail_gunlock2:
>   	gfs2_glock_dq_uninit(ghs + 1);
> -fail_free_inode:
> +fail_rs_put:
> +	gfs2_rs_put(ip);
>   	if (ip->i_gl)
>   		gfs2_glock_put(ip->i_gl);
> -	gfs2_rs_delete(ip, NULL);
>   fail_free_acls:
>   	if (default_acl)
>   		posix_acl_release(default_acl);
> @@ -796,6 +801,7 @@ fail_gunlock:
>   		iput(inode);
>   	}
>   fail:
> +	gfs2_rs_put(dip);
>   	return error;
>   }
>   
> @@ -898,7 +904,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
>   	if (S_ISDIR(inode->i_mode))
>   		return -EPERM;
>   
> -	error = gfs2_rs_alloc(dip);
> +	error = gfs2_rs_get(dip);
>   	if (error)
>   		return error;
>   
> @@ -1003,6 +1009,7 @@ out_child:
>   out_parent:
>   	gfs2_holder_uninit(ghs);
>   	gfs2_holder_uninit(ghs + 1);
> +	gfs2_rs_put(dip);
>   	return error;
>   }
>   
> @@ -1371,7 +1378,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
>   	if (error)
>   		return error;
>   
> -	error = gfs2_rs_alloc(ndip);
> +	error = gfs2_rs_get(ndip);
>   	if (error)
>   		return error;
>   
> @@ -1553,6 +1560,7 @@ out_gunlock_r:
>   	if (r_gh.gh_gl)
>   		gfs2_glock_dq_uninit(&r_gh);
>   out:
> +	gfs2_rs_put(ndip);
>   	return error;
>   }
>   
> @@ -1854,21 +1862,17 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
>   	if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
>   		ogid = ngid = NO_GID_QUOTA_CHANGE;
>   
> -	error = get_write_access(inode);
> -	if (error)
> -		return error;
> -
> -	error = gfs2_rs_alloc(ip);
> +	error = gfs2_rs_get(ip);
>   	if (error)
>   		goto out;
>   
>   	error = gfs2_rindex_update(sdp);
>   	if (error)
> -		goto out;
> +		goto out_put;
>   
>   	error = gfs2_quota_lock(ip, nuid, ngid);
>   	if (error)
> -		goto out;
> +		goto out_put;
>   
>   	ap.target = gfs2_get_inode_blocks(&ip->i_inode);
>   
> @@ -1897,8 +1901,9 @@ out_end_trans:
>   	gfs2_trans_end(sdp);
>   out_gunlock_q:
>   	gfs2_quota_unlock(ip);
> +out_put:
> +	gfs2_rs_put(ip);
>   out:
> -	put_write_access(inode);
>   	return error;
>   }
>   
> @@ -1920,21 +1925,21 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
>   	struct gfs2_holder i_gh;
>   	int error;
>   
> -	error = gfs2_rs_alloc(ip);
> +	error = gfs2_rs_get(ip);
>   	if (error)
>   		return error;
>   
>   	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
>   	if (error)
> -		return error;
> +		goto out;
>   
>   	error = -EPERM;
>   	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
> -		goto out;
> +		goto out_dq;
>   
>   	error = inode_change_ok(inode, attr);
>   	if (error)
> -		goto out;
> +		goto out_dq;
>   
>   	if (attr->ia_valid & ATTR_SIZE)
>   		error = gfs2_setattr_size(inode, attr->ia_size);
> @@ -1946,10 +1951,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
>   			error = posix_acl_chmod(inode, inode->i_mode);
>   	}
>   
> -out:
> +out_dq:
>   	if (!error)
>   		mark_inode_dirty(inode);
>   	gfs2_glock_dq_uninit(&i_gh);
> +out:
> +	gfs2_rs_put(ip);
>   	return error;
>   }
>   
> @@ -2002,9 +2009,10 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
>   	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
>   	ret = gfs2_glock_nq(&gh);
>   	if (ret == 0) {
> -		ret = gfs2_rs_alloc(ip);
> +		ret = gfs2_rs_get(ip);
>   		if (ret == 0)
>   			ret = generic_setxattr(dentry, name, data, size, flags);
> +		gfs2_rs_put(ip);
>   		gfs2_glock_dq(&gh);
>   	}
>   	gfs2_holder_uninit(&gh);
> @@ -2043,9 +2051,11 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
>   	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
>   	ret = gfs2_glock_nq(&gh);
>   	if (ret == 0) {
> -		ret = gfs2_rs_alloc(ip);
> -		if (ret == 0)
> +		ret = gfs2_rs_get(ip);
> +		if (ret == 0) {
>   			ret = generic_removexattr(dentry, name);
> +			gfs2_rs_put(ip);
> +		}
>   		gfs2_glock_dq(&gh);
>   	}
>   	gfs2_holder_uninit(&gh);
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 3a31226..c52eab1 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -533,20 +533,22 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
>   	struct gfs2_quota_data **qd;
>   	int error;
>   
> -	if (ip->i_res == NULL) {
> -		error = gfs2_rs_alloc(ip);
> -		if (error)
> -			return error;
> -	}
> +	error = gfs2_rs_get(ip);
> +	if (error)
> +		return error;
>   
>   	qd = ip->i_res->rs_qa_qd;
>   
>   	if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
> -	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
> -		return -EIO;
> +	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) {
> +		error = -EIO;
> +		goto out_err;
> +	}
>   
> -	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
> -		return 0;
> +	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) {
> +		error = 0;
> +		goto out_err;
> +	}
>   
>   	error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
>   	if (error)
> @@ -581,6 +583,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
>   out:
>   	if (error)
>   		gfs2_quota_unhold(ip);
> +out_err:
>   	return error;
>   }
>   
> @@ -598,6 +601,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip)
>   		ip->i_res->rs_qa_qd[x] = NULL;
>   	}
>   	ip->i_res->rs_qa_qd_num = 0;
> +	gfs2_rs_put(ip);
>   }
>   
>   static int sort_qd(const void *a, const void *b)
> @@ -843,7 +847,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
>   	unsigned int nalloc = 0, blocks;
>   	int error;
>   
> -	error = gfs2_rs_alloc(ip);
> +	error = gfs2_rs_get(ip);
>   	if (error)
>   		return error;
>   
> @@ -851,8 +855,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
>   			      &data_blocks, &ind_blocks);
>   
>   	ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
> -	if (!ghs)
> -		return -ENOMEM;
> +	if (!ghs) {
> +		error = -ENOMEM;
> +		goto out_err;
> +	}
>   
>   	sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
>   	mutex_lock(&ip->i_inode.i_mutex);
> @@ -923,6 +929,8 @@ out:
>   	mutex_unlock(&ip->i_inode.i_mutex);
>   	kfree(ghs);
>   	gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
> +out_err:
> +	gfs2_rs_put(ip);
>   	return error;
>   }
>   
> @@ -1635,7 +1643,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
>   	if (error)
>   		return error;
>   
> -	error = gfs2_rs_alloc(ip);
> +	error = gfs2_rs_get(ip);
>   	if (error)
>   		goto out_put;
>   
> @@ -1705,6 +1713,7 @@ out_q:
>   	gfs2_glock_dq_uninit(&q_gh);
>   out_unlockput:
>   	mutex_unlock(&ip->i_inode.i_mutex);
> +	gfs2_rs_put(ip);
>   out_put:
>   	qd_put(qd);
>   	return error;
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index c92ae7fd..89e9b66 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -596,24 +596,27 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
>   }
>   
>   /**
> - * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
> + * gfs2_rs_get - make sure we have a reservation assigned to the inode
>    * @ip: the inode for this reservation
>    */
> -int gfs2_rs_alloc(struct gfs2_inode *ip)
> +int gfs2_rs_get(struct gfs2_inode *ip)
>   {
>   	int error = 0;
>   
>   	down_write(&ip->i_rw_mutex);
>   	if (ip->i_res)
> -		goto out;
> +		goto out_incr;
>   
>   	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
>   	if (!ip->i_res) {
>   		error = -ENOMEM;
>   		goto out;
>   	}
> +	atomic_set(&ip->i_res->rs_ref, 0);
>   
>   	RB_CLEAR_NODE(&ip->i_res->rs_node);
> +out_incr:
> +	atomic_inc(&ip->i_res->rs_ref);
>   out:
>   	up_write(&ip->i_rw_mutex);
>   	return error;
> @@ -621,10 +624,11 @@ out:
>   
>   static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
>   {
> -	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
> +	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u r:%u\n",
>   		       (unsigned long long)rs->rs_inum,
>   		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
> -		       rs->rs_rbm.offset, rs->rs_free);
> +		       rs->rs_rbm.offset, rs->rs_free,
> +		       atomic_read(&rs->rs_ref));
>   }
>   
>   /**
> @@ -678,15 +682,15 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
>   }
>   
>   /**
> - * gfs2_rs_delete - delete a multi-block reservation
> + * gfs2_rs_put - put a multi-block reservation
>    * @ip: The inode for this reservation
>    * @wcount: The inode's write count, or NULL
>    *
>    */
> -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
> +void gfs2_rs_put(struct gfs2_inode *ip)
>   {
>   	down_write(&ip->i_rw_mutex);
> -	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
> +	if (ip->i_res && atomic_dec_and_test(&ip->i_res->rs_ref)) {
>   		gfs2_rs_deltree(ip->i_res);
>   		BUG_ON(ip->i_res->rs_free);
>   		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
> diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
> index c0ab33f..51cce86 100644
> --- a/fs/gfs2/rgrp.h
> +++ b/fs/gfs2/rgrp.h
> @@ -49,9 +49,9 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
>   extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
>   			     bool dinode, u64 *generation);
>   
> -extern int gfs2_rs_alloc(struct gfs2_inode *ip);
> +extern int gfs2_rs_get(struct gfs2_inode *ip);
>   extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
> -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
> +extern void gfs2_rs_put(struct gfs2_inode *ip);
>   extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
>   extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
>   extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
> diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
> index 2982445..b4d12a1 100644
> --- a/fs/gfs2/super.c
> +++ b/fs/gfs2/super.c
> @@ -1607,7 +1607,8 @@ out_unlock:
>   out:
>   	/* Case 3 starts here */
>   	truncate_inode_pages_final(&inode->i_data);
> -	gfs2_rs_delete(ip, NULL);
> +	gfs2_rs_put(ip);
> +	BUG_ON(ip->i_res);
>   	gfs2_ordered_del_inode(ip);
>   	clear_inode(inode);
>   	gfs2_dir_hash_inval(ip);
> diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
> index fff47d0..811a809 100644
> --- a/fs/gfs2/trace_gfs2.h
> +++ b/fs/gfs2/trace_gfs2.h
> @@ -32,13 +32,13 @@
>   			    { GFS2_BLKST_DINODE, "dinode" },	\
>   			    { GFS2_BLKST_UNLINKED, "unlinked" })
>   
> -#define TRACE_RS_DELETE  0
> +#define TRACE_RS_PUT     0
>   #define TRACE_RS_TREEDEL 1
>   #define TRACE_RS_INSERT  2
>   #define TRACE_RS_CLAIM   3
>   
>   #define rs_func_name(x) __print_symbolic(x,	\
> -					 { 0, "del " },	\
> +					 { 0, "put " },	\
>   					 { 1, "tdel" },	\
>   					 { 2, "ins " },	\
>   					 { 3, "clm " })
> @@ -524,6 +524,7 @@ TRACE_EVENT(gfs2_rs,
>   		__field(	u64,	inum			)
>   		__field(	u64,	start			)
>   		__field(	u32,	free			)
> +		__field(	u32,	ref			)
>   		__field(	u8,	func			)
>   	),
>   
> @@ -535,17 +536,20 @@ TRACE_EVENT(gfs2_rs,
>   		__entry->inum		= rs->rs_inum;
>   		__entry->start		= gfs2_rbm_to_block(&rs->rs_rbm);
>   		__entry->free		= rs->rs_free;
> +		__entry->ref		= atomic_read(&rs->rs_ref);
>   		__entry->func		= func;
>   	),
>   
> -	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
> +	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu "
> +		  "r:%lu",
>   		  MAJOR(__entry->dev), MINOR(__entry->dev),
>   		  (unsigned long long)__entry->inum,
>   		  (unsigned long long)__entry->start,
>   		  (unsigned long long)__entry->rd_addr,
>   		  (unsigned long)__entry->rd_free_clone,
>   		  (unsigned long)__entry->rd_reserved,
> -		  rs_func_name(__entry->func), (unsigned long)__entry->free)
> +		  rs_func_name(__entry->func), (unsigned long)__entry->free,
> +		  (unsigned long)__entry->ref)
>   );
>   
>   #endif /* _TRACE_GFS2_H */
>



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Cluster-devel] [GFS2 PATCH] GFS2: Make rgrp reservations part of the gfs2_inode structure
  2015-07-16  8:56   ` Steven Whitehouse
@ 2015-07-17 13:21     ` Bob Peterson
  0 siblings, 0 replies; 3+ messages in thread
From: Bob Peterson @ 2015-07-17 13:21 UTC (permalink / raw)
  To: cluster-devel.redhat.com

----- Original Message -----
> Hi,
> 
> On 15/07/15 16:02, Bob Peterson wrote:
> > Hi,
> >
> > This patch changes the scheme for multi-block reservations so that
> > it uses a reference count rather than relying upon the last closer
> > to delete it. This is necessary because multiple operations can
> > require a reservation structure that don't involved file open/close,
> > i.e. inode operations. We need to ensure that the last file closer
> > doesn't delete the reservation structure while it's being used by
> > an inode operation.
> >
> > Regards,
> >
> > Bob Peterson
> > Red Hat File Systems
> Having to add a reference count does seem a bit heavy weight here. We
> could go back to the scheme of simply including the reservation
> structure in the inode to make it simpler, at the expense of more memory
> usage for inodes that are not actively being written to. Either way I'd
> prefer to avoid using an atomic ref count that has to be tweeked on
> every write, since that will likely be an issue with multi-threaded
> workloads, which was why we avoided going down that route in the first
> place,
> 
> Steve.

Hi,

I like the idea of including the reservation structure into the inode.
It saves us a lot of headaches with slab allocations, simplifies a lot of
code, and ensures that the structure will always be there when we need it.
The following is a revised patch for this.

Regards,

Bob Peterson
Red Hat File Systems

Patch description:

Before this patch, multi-block reservation structures were allocated
from a special slab. This patch folds the structure into the gfs2_inode
structure. The disadvantage is that the gfs2_inode needs more memory,
even when a file is opened read-only. The advantages are: (a) we don't
need the special slab and the extra time it takes to allocate and
deallocate from it. (b) we no longer need to worry that the structure
exists for things like quota management. (c) This also allows us to
remove the calls to get_write_access and put_write_access since we
know the structure will exist.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1caee05..c2267bc 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -914,7 +914,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 failed:
 	gfs2_trans_end(sdp);
 	gfs2_inplace_release(ip);
-	if (ip->i_res->rs_qa_qd_num)
+	if (ip->i_res.rs_qa_qd_num)
 		gfs2_quota_unlock(ip);
 	if (inode == sdp->sd_rindex) {
 		gfs2_glock_dq(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 61296ec..917718e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -787,8 +787,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	if (error)
 		goto out_rlist;
 
-	if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
-		gfs2_rs_deltree(ip->i_res);
+	if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */
+		gfs2_rs_deltree(&ip->i_res);
 
 	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
 				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
@@ -1291,26 +1291,17 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 	if (ret)
 		return ret;
 
-	ret = get_write_access(inode);
-	if (ret)
-		return ret;
-
 	inode_dio_wait(inode);
 
-	ret = gfs2_rs_alloc(ip);
-	if (ret)
-		goto out;
-
 	oldsize = inode->i_size;
 	if (newsize >= oldsize) {
 		ret = do_grow(inode, newsize);
 		goto out;
 	}
 
-	gfs2_rs_deltree(ip->i_res);
+	gfs2_rs_deltree(&ip->i_res);
 	ret = do_shrink(inode, oldsize, newsize);
 out:
-	put_write_access(inode);
 	return ret;
 }
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index cf4ab89..026be77 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -336,8 +336,8 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
 	size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
 	int hint = min_t(size_t, INT_MAX, blks);
 
-	if (hint > atomic_read(&ip->i_res->rs_sizehint))
-		atomic_set(&ip->i_res->rs_sizehint, hint);
+	if (hint > atomic_read(&ip->i_res.rs_sizehint))
+		atomic_set(&ip->i_res.rs_sizehint, hint);
 }
 
 /**
@@ -397,14 +397,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	/* Update file times before taking page lock */
 	file_update_time(vma->vm_file);
 
-	ret = get_write_access(inode);
-	if (ret)
-		goto out;
-
-	ret = gfs2_rs_alloc(ip);
-	if (ret)
-		goto out_write_access;
-
 	gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
@@ -486,9 +478,6 @@ out_uninit:
 		set_page_dirty(page);
 		wait_for_stable_page(page);
 	}
-out_write_access:
-	put_write_access(inode);
-out:
 	sb_end_pagefault(inode->i_sb);
 	return block_page_mkwrite_return(ret);
 }
@@ -703,10 +692,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct gfs2_inode *ip = GFS2_I(file_inode(file));
 	int ret;
 
-	ret = gfs2_rs_alloc(ip);
-	if (ret)
-		return ret;
-
 	gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
 
 	if (iocb->ki_flags & IOCB_APPEND) {
@@ -934,19 +919,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
 			goto out_unlock;
 	}
 
-	ret = get_write_access(inode);
-	if (ret)
-		goto out_unlock;
-
-	ret = gfs2_rs_alloc(ip);
-	if (ret)
-		goto out_putw;
-
 	ret = __gfs2_fallocate(file, mode, offset, len);
 	if (ret)
-		gfs2_rs_deltree(ip->i_res);
-out_putw:
-	put_write_access(inode);
+		gfs2_rs_deltree(&ip->i_res);
 out_unlock:
 	gfs2_glock_dq(&gh);
 out_uninit:
@@ -959,13 +934,6 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
 				      struct file *out, loff_t *ppos,
 				      size_t len, unsigned int flags)
 {
-	int error;
-	struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
-
-	error = gfs2_rs_alloc(ip);
-	if (error)
-		return (ssize_t)error;
-
 	gfs2_size_hint(out, *ppos, len);
 
 	return iter_file_splice_write(pipe, out, ppos, len, flags);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e300f74..bf6c45d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -392,7 +392,7 @@ struct gfs2_inode {
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
 	struct gfs2_holder i_gh; /* for prepare/commit_write only */
-	struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
+	struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */
 	struct gfs2_rgrpd *i_rgd;
 	u64 i_goal;	/* goal block for allocations */
 	struct rw_semaphore i_rw_mutex;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 063fdfc..d4f382f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -601,10 +601,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return -ENAMETOOLONG;
 
-	error = gfs2_rs_alloc(dip);
-	if (error)
-		return error;
-
 	error = gfs2_rindex_update(sdp);
 	if (error)
 		return error;
@@ -653,10 +649,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		goto fail_free_vfs_inode;
 
 	ip = GFS2_I(inode);
-	error = gfs2_rs_alloc(ip);
-	if (error)
-		goto fail_free_acls;
-
 	inode->i_mode = mode;
 	set_nlink(inode, S_ISDIR(mode) ? 2 : 1);
 	inode->i_rdev = dev;
@@ -777,7 +769,6 @@ fail_free_inode:
 	if (ip->i_gl)
 		gfs2_glock_put(ip->i_gl);
 	gfs2_rs_delete(ip, NULL);
-fail_free_acls:
 	if (default_acl)
 		posix_acl_release(default_acl);
 	if (acl)
@@ -898,10 +889,6 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	error = gfs2_rs_alloc(dip);
-	if (error)
-		return error;
-
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
 
@@ -1371,10 +1358,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	if (error)
 		return error;
 
-	error = gfs2_rs_alloc(ndip);
-	if (error)
-		return error;
-
 	if (odip != ndip) {
 		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
 					   0, &r_gh);
@@ -1854,14 +1837,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
 		ogid = ngid = NO_GID_QUOTA_CHANGE;
 
-	error = get_write_access(inode);
-	if (error)
-		return error;
-
-	error = gfs2_rs_alloc(ip);
-	if (error)
-		goto out;
-
 	error = gfs2_rindex_update(sdp);
 	if (error)
 		goto out;
@@ -1898,7 +1873,6 @@ out_end_trans:
 out_gunlock_q:
 	gfs2_quota_unlock(ip);
 out:
-	put_write_access(inode);
 	return error;
 }
 
@@ -1920,10 +1894,6 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	struct gfs2_holder i_gh;
 	int error;
 
-	error = gfs2_rs_alloc(ip);
-	if (error)
-		return error;
-
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
 	if (error)
 		return error;
@@ -2002,9 +1972,7 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret == 0) {
-		ret = gfs2_rs_alloc(ip);
-		if (ret == 0)
-			ret = generic_setxattr(dentry, name, data, size, flags);
+		ret = generic_setxattr(dentry, name, data, size, flags);
 		gfs2_glock_dq(&gh);
 	}
 	gfs2_holder_uninit(&gh);
@@ -2043,9 +2011,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret == 0) {
-		ret = gfs2_rs_alloc(ip);
-		if (ret == 0)
-			ret = generic_removexattr(dentry, name);
+		ret = generic_removexattr(dentry, name);
 		gfs2_glock_dq(&gh);
 	}
 	gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 241a399..e9b3071 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -41,7 +41,8 @@ static void gfs2_init_inode_once(void *foo)
 	inode_init_once(&ip->i_inode);
 	init_rwsem(&ip->i_rw_mutex);
 	INIT_LIST_HEAD(&ip->i_trunc_list);
-	ip->i_res = NULL;
+	memset(&ip->i_res, 0, sizeof(ip->i_res));
+	RB_CLEAR_NODE(&ip->i_res.rs_node);
 	ip->i_hash_cache = NULL;
 }
 
@@ -135,12 +136,6 @@ static int __init init_gfs2_fs(void)
 	if (!gfs2_quotad_cachep)
 		goto fail;
 
-	gfs2_rsrv_cachep = kmem_cache_create("gfs2_mblk",
-					     sizeof(struct gfs2_blkreserv),
-					       0, 0, NULL);
-	if (!gfs2_rsrv_cachep)
-		goto fail;
-
 	register_shrinker(&gfs2_qd_shrinker);
 
 	error = register_filesystem(&gfs2_fs_type);
@@ -193,9 +188,6 @@ fail_lru:
 	unregister_shrinker(&gfs2_qd_shrinker);
 	gfs2_glock_exit();
 
-	if (gfs2_rsrv_cachep)
-		kmem_cache_destroy(gfs2_rsrv_cachep);
-
 	if (gfs2_quotad_cachep)
 		kmem_cache_destroy(gfs2_quotad_cachep);
 
@@ -238,7 +230,6 @@ static void __exit exit_gfs2_fs(void)
 	rcu_barrier();
 
 	mempool_destroy(gfs2_page_pool);
-	kmem_cache_destroy(gfs2_rsrv_cachep);
 	kmem_cache_destroy(gfs2_quotad_cachep);
 	kmem_cache_destroy(gfs2_rgrpd_cachep);
 	kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3a31226..a49176d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -533,15 +533,9 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 	struct gfs2_quota_data **qd;
 	int error;
 
-	if (ip->i_res == NULL) {
-		error = gfs2_rs_alloc(ip);
-		if (error)
-			return error;
-	}
+	qd = ip->i_res.rs_qa_qd;
 
-	qd = ip->i_res->rs_qa_qd;
-
-	if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
+	if (gfs2_assert_warn(sdp, !ip->i_res.rs_qa_qd_num) ||
 	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
 		return -EIO;
 
@@ -551,13 +545,13 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 	error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
 	if (error)
 		goto out;
-	ip->i_res->rs_qa_qd_num++;
+	ip->i_res.rs_qa_qd_num++;
 	qd++;
 
 	error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd);
 	if (error)
 		goto out;
-	ip->i_res->rs_qa_qd_num++;
+	ip->i_res.rs_qa_qd_num++;
 	qd++;
 
 	if (!uid_eq(uid, NO_UID_QUOTA_CHANGE) &&
@@ -565,7 +559,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 		error = qdsb_get(sdp, make_kqid_uid(uid), qd);
 		if (error)
 			goto out;
-		ip->i_res->rs_qa_qd_num++;
+		ip->i_res.rs_qa_qd_num++;
 		qd++;
 	}
 
@@ -574,7 +568,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 		error = qdsb_get(sdp, make_kqid_gid(gid), qd);
 		if (error)
 			goto out;
-		ip->i_res->rs_qa_qd_num++;
+		ip->i_res.rs_qa_qd_num++;
 		qd++;
 	}
 
@@ -589,15 +583,13 @@ void gfs2_quota_unhold(struct gfs2_inode *ip)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	unsigned int x;
 
-	if (ip->i_res == NULL)
-		return;
 	gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
 
-	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
-		qdsb_put(ip->i_res->rs_qa_qd[x]);
-		ip->i_res->rs_qa_qd[x] = NULL;
+	for (x = 0; x < ip->i_res.rs_qa_qd_num; x++) {
+		qdsb_put(ip->i_res.rs_qa_qd[x]);
+		ip->i_res.rs_qa_qd[x] = NULL;
 	}
-	ip->i_res->rs_qa_qd_num = 0;
+	ip->i_res.rs_qa_qd_num = 0;
 }
 
 static int sort_qd(const void *a, const void *b)
@@ -843,10 +835,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	unsigned int nalloc = 0, blocks;
 	int error;
 
-	error = gfs2_rs_alloc(ip);
-	if (error)
-		return error;
-
 	gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
 			      &data_blocks, &ind_blocks);
 
@@ -1014,12 +1002,12 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 	    sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
 		return 0;
 
-	sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num,
+	sort(ip->i_res.rs_qa_qd, ip->i_res.rs_qa_qd_num,
 	     sizeof(struct gfs2_quota_data *), sort_qd, NULL);
 
-	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
-		qd = ip->i_res->rs_qa_qd[x];
-		error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
+	for (x = 0; x < ip->i_res.rs_qa_qd_num; x++) {
+		qd = ip->i_res.rs_qa_qd[x];
+		error = do_glock(qd, NO_FORCE, &ip->i_res.rs_qa_qd_ghs[x]);
 		if (error)
 			break;
 	}
@@ -1028,7 +1016,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 		set_bit(GIF_QD_LOCKED, &ip->i_flags);
 	else {
 		while (x--)
-			gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
+			gfs2_glock_dq_uninit(&ip->i_res.rs_qa_qd_ghs[x]);
 		gfs2_quota_unhold(ip);
 	}
 
@@ -1082,14 +1070,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
 	if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
 		goto out;
 
-	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
+	for (x = 0; x < ip->i_res.rs_qa_qd_num; x++) {
 		struct gfs2_quota_data *qd;
 		int sync;
 
-		qd = ip->i_res->rs_qa_qd[x];
+		qd = ip->i_res.rs_qa_qd[x];
 		sync = need_sync(qd);
 
-		gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
+		gfs2_glock_dq_uninit(&ip->i_res.rs_qa_qd_ghs[x]);
 		if (!sync)
 			continue;
 
@@ -1168,8 +1156,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
         if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                 return 0;
 
-	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
-		qd = ip->i_res->rs_qa_qd[x];
+	for (x = 0; x < ip->i_res.rs_qa_qd_num; x++) {
+		qd = ip->i_res.rs_qa_qd[x];
 
 		if (!(qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
 		      qid_eq(qd->qd_id, make_kqid_gid(gid))))
@@ -1223,8 +1211,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	if (ip->i_diskflags & GFS2_DIF_SYSTEM)
 		return;
 
-	for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
-		qd = ip->i_res->rs_qa_qd[x];
+	for (x = 0; x < ip->i_res.rs_qa_qd_num; x++) {
+		qd = ip->i_res.rs_qa_qd[x];
 
 		if (qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
 		    qid_eq(qd->qd_id, make_kqid_gid(gid))) {
@@ -1635,10 +1623,6 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
 	if (error)
 		return error;
 
-	error = gfs2_rs_alloc(ip);
-	if (error)
-		goto out_put;
-
 	mutex_lock(&ip->i_inode.i_mutex);
 	error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
 	if (error)
@@ -1705,7 +1689,6 @@ out_q:
 	gfs2_glock_dq_uninit(&q_gh);
 out_unlockput:
 	mutex_unlock(&ip->i_inode.i_mutex);
-out_put:
 	qd_put(qd);
 	return error;
 }
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c92ae7fd..3f70318 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -595,30 +595,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 	}
 }
 
-/**
- * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
- * @ip: the inode for this reservation
- */
-int gfs2_rs_alloc(struct gfs2_inode *ip)
-{
-	int error = 0;
-
-	down_write(&ip->i_rw_mutex);
-	if (ip->i_res)
-		goto out;
-
-	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
-	if (!ip->i_res) {
-		error = -ENOMEM;
-		goto out;
-	}
-
-	RB_CLEAR_NODE(&ip->i_res->rs_node);
-out:
-	up_write(&ip->i_rw_mutex);
-	return error;
-}
-
 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 {
 	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
@@ -686,11 +662,9 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
 {
 	down_write(&ip->i_rw_mutex);
-	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
-		gfs2_rs_deltree(ip->i_res);
-		BUG_ON(ip->i_res->rs_free);
-		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
-		ip->i_res = NULL;
+	if ((wcount == NULL) || (atomic_read(wcount) <= 1)) {
+		gfs2_rs_deltree(&ip->i_res);
+		BUG_ON(ip->i_res.rs_free);
 	}
 	up_write(&ip->i_rw_mutex);
 }
@@ -1455,7 +1429,7 @@ static void rs_insert(struct gfs2_inode *ip)
 {
 	struct rb_node **newn, *parent = NULL;
 	int rc;
-	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_blkreserv *rs = &ip->i_res;
 	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
 	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
 
@@ -1502,7 +1476,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 {
 	struct gfs2_rbm rbm = { .rgd = rgd, };
 	u64 goal;
-	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_blkreserv *rs = &ip->i_res;
 	u32 extlen;
 	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
 	int ret;
@@ -1573,7 +1547,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 	}
 
 	if (n) {
-		while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
+		while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
 			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
 			n = n->rb_right;
 			if (n == NULL)
@@ -1983,7 +1957,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *begin = NULL;
-	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_blkreserv *rs = &ip->i_res;
 	int error = 0, rg_locked, flags = 0;
 	u64 last_unlinked = NO_BLOCK;
 	int loops = 0;
@@ -2112,7 +2086,7 @@ next_rgrp:
 
 void gfs2_inplace_release(struct gfs2_inode *ip)
 {
-	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_blkreserv *rs = &ip->i_res;
 
 	if (rs->rs_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
@@ -2266,7 +2240,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
 static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 				    const struct gfs2_rbm *rbm, unsigned len)
 {
-	struct gfs2_blkreserv *rs = ip->i_res;
+	struct gfs2_blkreserv *rs = &ip->i_res;
 	struct gfs2_rgrpd *rgd = rbm->rgd;
 	unsigned rlen;
 	u64 block;
@@ -2309,8 +2283,8 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
 {
 	u64 goal;
 
-	if (gfs2_rs_active(ip->i_res)) {
-		*rbm = ip->i_res->rs_rbm;
+	if (gfs2_rs_active(&ip->i_res)) {
+		*rbm = ip->i_res.rs_rbm;
 		return;
 	}
 
@@ -2364,7 +2338,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	gfs2_alloc_extent(&rbm, dinode, nblocks);
 	block = gfs2_rbm_to_block(&rbm);
 	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
-	if (gfs2_rs_active(ip->i_res))
+	if (gfs2_rs_active(&ip->i_res))
 		gfs2_adjust_reservation(ip, &rbm, *nblocks);
 	ndata = *nblocks;
 	if (dinode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index c0ab33f..e87f076 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -49,7 +49,6 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
-extern int gfs2_rs_alloc(struct gfs2_inode *ip);
 extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
 extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
@@ -78,7 +77,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
 /* This is how to tell if a reservation is in the rgrp tree: */
-static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
+static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
 {
 	return rs && !RB_EMPTY_NODE(&rs->rs_node);
 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2982445..8270aba 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1593,8 +1593,8 @@ out_truncate:
 
 out_unlock:
 	/* Error path for case 1 */
-	if (gfs2_rs_active(ip->i_res))
-		gfs2_rs_deltree(ip->i_res);
+	if (gfs2_rs_active(&ip->i_res))
+		gfs2_rs_deltree(&ip->i_res);
 
 	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
 		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
@@ -1632,7 +1632,8 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 		ip->i_flags = 0;
 		ip->i_gl = NULL;
 		ip->i_rgd = NULL;
-		ip->i_res = NULL;
+		memset(&ip->i_res, 0, sizeof(ip->i_res));
+		RB_CLEAR_NODE(&ip->i_res.rs_node);
 	}
 	return &ip->i_inode;
 }
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 86d2035..ad7fad0 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -27,7 +27,6 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly;
 struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
 struct kmem_cache *gfs2_quotad_cachep __read_mostly;
-struct kmem_cache *gfs2_rsrv_cachep __read_mostly;
 mempool_t *gfs2_page_pool __read_mostly;
 
 void gfs2_assert_i(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index cbdcbdf..cb90029 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -149,7 +149,6 @@ extern struct kmem_cache *gfs2_inode_cachep;
 extern struct kmem_cache *gfs2_bufdata_cachep;
 extern struct kmem_cache *gfs2_rgrpd_cachep;
 extern struct kmem_cache *gfs2_quotad_cachep;
-extern struct kmem_cache *gfs2_rsrv_cachep;
 extern mempool_t *gfs2_page_pool;
 
 static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-07-17 13:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <970678731.38907530.1436972414921.JavaMail.zimbra@redhat.com>
2015-07-15 15:02 ` [Cluster-devel] [GFS2 PATCH] GFS2: Use reference count for multi-block reservations Bob Peterson
2015-07-16  8:56   ` Steven Whitehouse
2015-07-17 13:21     ` [Cluster-devel] [GFS2 PATCH] GFS2: Make rgrp reservations part of the gfs2_inode structure Bob Peterson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.