[Cluster-devel] GFS2: Pre-pull patch posting (merge window)

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2011-03-15  9:11 Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 01/15] GFS2: Use RCU for glock hash table Steven Whitehouse
                   ` (14 more replies)
  0 siblings, 15 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

The most interesting "feature" in this patch set is the RCU glock
patch which has been a long time coming, but is finally here. That
patch contains most of the changes this time. The other patches ins
this set are mostly smaller bug fixes and performance improvements.

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 01/15] GFS2: Use RCU for glock hash table
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 02/15] GFS2: Post-VFS scale update for RCU path walk Steven Whitehouse
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This has a number of advantages:

 - Reduces contention on the hash table lock
 - Makes the code smaller and simpler
 - Should speed up glock dumps when under load
 - Removes ref count changing in examine_bucket
 - No longer need hash chain lock in glock_put() in common case

There are some further changes which this enables and which
we may do in the future. One is to look at using SLAB_RCU,
and another is to look at using a per-cpu counter for the
per-sb glock counter, since that is touched twice in the
lifetime of each glock (but only used at umount time).

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb..c75d499 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -26,6 +26,9 @@
 #include <linux/freezer.h>
 #include <linux/workqueue.h>
 #include <linux/jiffies.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
+#include <linux/bit_spinlock.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -41,10 +44,6 @@
 #define CREATE_TRACE_POINTS
 #include "trace_gfs2.h"
 
-struct gfs2_gl_hash_bucket {
-        struct hlist_head hb_list;
-};
-
 struct gfs2_glock_iter {
 	int hash;			/* hash bucket index         */
 	struct gfs2_sbd *sdp;		/* incore superblock         */
@@ -54,7 +53,6 @@ struct gfs2_glock_iter {
 
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
 
-static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
 static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
 #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock);
 #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
 #define GFS2_GL_HASH_MASK       (GFS2_GL_HASH_SIZE - 1)
 
-static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
+static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
 static struct dentry *gfs2_root;
 
-/*
- * Despite what you might think, the numbers below are not arbitrary :-)
- * They are taken from the ipv4 routing hash code, which is well tested
- * and thus should be nearly optimal. Later on we might tweek the numbers
- * but for now this should be fine.
- *
- * The reason for putting the locks in a separate array from the list heads
- * is that we can have fewer locks than list heads and save memory. We use
- * the same hash function for both, but with a different hash mask.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
-	defined(CONFIG_PROVE_LOCKING)
-
-#ifdef CONFIG_LOCKDEP
-# define GL_HASH_LOCK_SZ        256
-#else
-# if NR_CPUS >= 32
-#  define GL_HASH_LOCK_SZ       4096
-# elif NR_CPUS >= 16
-#  define GL_HASH_LOCK_SZ       2048
-# elif NR_CPUS >= 8
-#  define GL_HASH_LOCK_SZ       1024
-# elif NR_CPUS >= 4
-#  define GL_HASH_LOCK_SZ       512
-# else
-#  define GL_HASH_LOCK_SZ       256
-# endif
-#endif
-
-/* We never want more locks than chains */
-#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
-# undef GL_HASH_LOCK_SZ
-# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
-#endif
-
-static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
-
-static inline rwlock_t *gl_lock_addr(unsigned int x)
-{
-	return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
-}
-#else /* not SMP, so no spinlocks required */
-static inline rwlock_t *gl_lock_addr(unsigned int x)
-{
-	return NULL;
-}
-#endif
-
 /**
  * gl_hash() - Turn glock number into hash bucket number
  * @lock: The glock number
@@ -141,25 +91,30 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
 	return h;
 }
 
-/**
- * glock_free() - Perform a few checks and then release struct gfs2_glock
- * @gl: The glock to release
- *
- * Also calls lock module to release its internal structure for this glock.
- *
- */
+static inline void spin_lock_bucket(unsigned int hash)
+{
+	struct hlist_bl_head *bl = &gl_hash_table[hash];
+	bit_spin_lock(0, (unsigned long *)bl);
+}
+
+static inline void spin_unlock_bucket(unsigned int hash)
+{
+	struct hlist_bl_head *bl = &gl_hash_table[hash];
+	__bit_spin_unlock(0, (unsigned long *)bl);
+}
 
-static void glock_free(struct gfs2_glock *gl)
+void gfs2_glock_free(struct rcu_head *rcu)
 {
+	struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	struct address_space *mapping = gfs2_glock2aspace(gl);
-	struct kmem_cache *cachep = gfs2_glock_cachep;
 
-	GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
-	trace_gfs2_glock_put(gl);
-	if (mapping)
-		cachep = gfs2_glock_aspace_cachep;
-	sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
+	if (gl->gl_ops->go_flags & GLOF_ASPACE)
+		kmem_cache_free(gfs2_glock_aspace_cachep, gl);
+	else
+		kmem_cache_free(gfs2_glock_cachep, gl);
+
+	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+		wake_up(&sdp->sd_glock_wait);
 }
 
 /**
@@ -185,34 +140,49 @@ static int demote_ok(const struct gfs2_glock *gl)
 {
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 
+	/* assert_spin_locked(&gl->gl_spin); */
+
 	if (gl->gl_state == LM_ST_UNLOCKED)
 		return 0;
-	if (!list_empty(&gl->gl_holders))
+	if (test_bit(GLF_LFLUSH, &gl->gl_flags))
+		return 0;
+	if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
+	    !list_empty(&gl->gl_holders))
 		return 0;
 	if (glops->go_demote_ok)
 		return glops->go_demote_ok(gl);
 	return 1;
 }
 
+
 /**
- * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
+ * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
  * @gl: the glock
  *
+ * If the glock is demotable, then we add it (or move it) to the end
+ * of the glock LRU list.
  */
 
-static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
 {
-	int may_reclaim;
-	may_reclaim = (demote_ok(gl) &&
-		       (atomic_read(&gl->gl_ref) == 1 ||
-			(gl->gl_name.ln_type == LM_TYPE_INODE &&
-			 atomic_read(&gl->gl_ref) <= 2)));
-	spin_lock(&lru_lock);
-	if (list_empty(&gl->gl_lru) && may_reclaim) {
+	if (demote_ok(gl)) {
+		spin_lock(&lru_lock);
+
+		if (!list_empty(&gl->gl_lru))
+			list_del_init(&gl->gl_lru);
+		else
+			atomic_inc(&lru_count);
+
 		list_add_tail(&gl->gl_lru, &lru_list);
-		atomic_inc(&lru_count);
+		spin_unlock(&lru_lock);
 	}
-	spin_unlock(&lru_lock);
+}
+
+void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+{
+	spin_lock(&gl->gl_spin);
+	__gfs2_glock_schedule_for_reclaim(gl);
+	spin_unlock(&gl->gl_spin);
 }
 
 /**
@@ -227,7 +197,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
 {
 	if (atomic_dec_and_test(&gl->gl_ref))
 		GLOCK_BUG_ON(gl, 1);
-	gfs2_glock_schedule_for_reclaim(gl);
 }
 
 /**
@@ -236,30 +205,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
  *
  */
 
-int gfs2_glock_put(struct gfs2_glock *gl)
+void gfs2_glock_put(struct gfs2_glock *gl)
 {
-	int rv = 0;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct address_space *mapping = gfs2_glock2aspace(gl);
 
-	write_lock(gl_lock_addr(gl->gl_hash));
-	if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
-		hlist_del(&gl->gl_list);
+	if (atomic_dec_and_test(&gl->gl_ref)) {
+		spin_lock_bucket(gl->gl_hash);
+		hlist_bl_del_rcu(&gl->gl_list);
+		spin_unlock_bucket(gl->gl_hash);
+		spin_lock(&lru_lock);
 		if (!list_empty(&gl->gl_lru)) {
 			list_del_init(&gl->gl_lru);
 			atomic_dec(&lru_count);
 		}
 		spin_unlock(&lru_lock);
-		write_unlock(gl_lock_addr(gl->gl_hash));
 		GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
-		glock_free(gl);
-		rv = 1;
-		goto out;
+		GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
+		trace_gfs2_glock_put(gl);
+		sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
 	}
-	spin_lock(&gl->gl_spin);
-	gfs2_glock_schedule_for_reclaim(gl);
-	spin_unlock(&gl->gl_spin);
-	write_unlock(gl_lock_addr(gl->gl_hash));
-out:
-	return rv;
 }
 
 /**
@@ -275,17 +240,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
 					const struct lm_lockname *name)
 {
 	struct gfs2_glock *gl;
-	struct hlist_node *h;
+	struct hlist_bl_node *h;
 
-	hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) {
+	hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
 		if (!lm_name_equal(&gl->gl_name, name))
 			continue;
 		if (gl->gl_sbd != sdp)
 			continue;
-
-		atomic_inc(&gl->gl_ref);
-
-		return gl;
+		if (atomic_inc_not_zero(&gl->gl_ref))
+			return gl;
 	}
 
 	return NULL;
@@ -743,10 +706,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	struct gfs2_glock *gl, *tmp;
 	unsigned int hash = gl_hash(sdp, &name);
 	struct address_space *mapping;
+	struct kmem_cache *cachep;
 
-	read_lock(gl_lock_addr(hash));
+	rcu_read_lock();
 	gl = search_bucket(hash, sdp, &name);
-	read_unlock(gl_lock_addr(hash));
+	rcu_read_unlock();
 
 	*glp = gl;
 	if (gl)
@@ -755,9 +719,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 		return -ENOENT;
 
 	if (glops->go_flags & GLOF_ASPACE)
-		gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL);
+		cachep = gfs2_glock_aspace_cachep;
 	else
-		gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
+		cachep = gfs2_glock_cachep;
+	gl = kmem_cache_alloc(cachep, GFP_KERNEL);
 	if (!gl)
 		return -ENOMEM;
 
@@ -790,15 +755,15 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 		mapping->writeback_index = 0;
 	}
 
-	write_lock(gl_lock_addr(hash));
+	spin_lock_bucket(hash);
 	tmp = search_bucket(hash, sdp, &name);
 	if (tmp) {
-		write_unlock(gl_lock_addr(hash));
-		glock_free(gl);
+		spin_unlock_bucket(hash);
+		kmem_cache_free(cachep, gl);
 		gl = tmp;
 	} else {
-		hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list);
-		write_unlock(gl_lock_addr(hash));
+		hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
+		spin_unlock_bucket(hash);
 	}
 
 	*glp = gl;
@@ -1113,6 +1078,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
 			fast_path = 1;
 	}
+	__gfs2_glock_schedule_for_reclaim(gl);
 	trace_gfs2_glock_queue(gh, 0);
 	spin_unlock(&gl->gl_spin);
 	if (likely(fast_path))
@@ -1440,42 +1406,30 @@ static struct shrinker glock_shrinker = {
  * @sdp: the filesystem
  * @bucket: the bucket
  *
- * Returns: 1 if the bucket has entries
  */
 
-static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
+static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
 			  unsigned int hash)
 {
-	struct gfs2_glock *gl, *prev = NULL;
-	int has_entries = 0;
-	struct hlist_head *head = &gl_hash_table[hash].hb_list;
+	struct gfs2_glock *gl;
+	struct hlist_bl_head *head = &gl_hash_table[hash];
+	struct hlist_bl_node *pos;
 
-	read_lock(gl_lock_addr(hash));
-	/* Can't use hlist_for_each_entry - don't want prefetch here */
-	if (hlist_empty(head))
-		goto out;
-	gl = list_entry(head->first, struct gfs2_glock, gl_list);
-	while(1) {
-		if (!sdp || gl->gl_sbd == sdp) {
-			gfs2_glock_hold(gl);
-			read_unlock(gl_lock_addr(hash));
-			if (prev)
-				gfs2_glock_put(prev);
-			prev = gl;
+	rcu_read_lock();
+	hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
+		if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
 			examiner(gl);
-			has_entries = 1;
-			read_lock(gl_lock_addr(hash));
-		}
-		if (gl->gl_list.next == NULL)
-			break;
-		gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
 	}
-out:
-	read_unlock(gl_lock_addr(hash));
-	if (prev)
-		gfs2_glock_put(prev);
+	rcu_read_unlock();
 	cond_resched();
-	return has_entries;
+}
+
+static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
+{
+	unsigned x;
+
+	for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
+		examine_bucket(examiner, sdp, x);
 }
 
 
@@ -1529,10 +1483,21 @@ static void clear_glock(struct gfs2_glock *gl)
 
 void gfs2_glock_thaw(struct gfs2_sbd *sdp)
 {
-	unsigned x;
+	glock_hash_walk(thaw_glock, sdp);
+}
 
-	for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
-		examine_bucket(thaw_glock, sdp, x);
+static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+{
+	int ret;
+	spin_lock(&gl->gl_spin);
+	ret = __dump_glock(seq, gl);
+	spin_unlock(&gl->gl_spin);
+	return ret;
+}
+
+static void dump_glock_func(struct gfs2_glock *gl)
+{
+	dump_glock(NULL, gl);
 }
 
 /**
@@ -1545,13 +1510,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
 
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 {
-	unsigned int x;
-
-	for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
-		examine_bucket(clear_glock, sdp, x);
+	glock_hash_walk(clear_glock, sdp);
 	flush_workqueue(glock_workqueue);
 	wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
-	gfs2_dump_lockstate(sdp);
+	glock_hash_walk(dump_glock_func, sdp);
 }
 
 void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
@@ -1717,66 +1679,15 @@ out:
 	return error;
 }
 
-static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
-{
-	int ret;
-	spin_lock(&gl->gl_spin);
-	ret = __dump_glock(seq, gl);
-	spin_unlock(&gl->gl_spin);
-	return ret;
-}
-
-/**
- * gfs2_dump_lockstate - print out the current lockstate
- * @sdp: the filesystem
- * @ub: the buffer to copy the information into
- *
- * If @ub is NULL, dump the lockstate to the console.
- *
- */
-
-static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
-{
-	struct gfs2_glock *gl;
-	struct hlist_node *h;
-	unsigned int x;
-	int error = 0;
-
-	for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
-
-		read_lock(gl_lock_addr(x));
-
-		hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
-			if (gl->gl_sbd != sdp)
-				continue;
-
-			error = dump_glock(NULL, gl);
-			if (error)
-				break;
-		}
-
-		read_unlock(gl_lock_addr(x));
-
-		if (error)
-			break;
-	}
-
 
-	return error;
-}
 
 
 int __init gfs2_glock_init(void)
 {
 	unsigned i;
 	for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
-		INIT_HLIST_HEAD(&gl_hash_table[i].hb_list);
-	}
-#ifdef GL_HASH_LOCK_SZ
-	for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
-		rwlock_init(&gl_hash_locks[i]);
+		INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
 	}
-#endif
 
 	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
 					  WQ_HIGHPRI | WQ_FREEZEABLE, 0);
@@ -1802,62 +1713,54 @@ void gfs2_glock_exit(void)
 	destroy_workqueue(gfs2_delete_workqueue);
 }
 
+static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
+{
+	return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
+			      struct gfs2_glock, gl_list);
+}
+
+static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
+{
+	return hlist_bl_entry(rcu_dereference_raw(gl->gl_list.next),
+			      struct gfs2_glock, gl_list);
+}
+
 static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
 {
 	struct gfs2_glock *gl;
 
-restart:
-	read_lock(gl_lock_addr(gi->hash));
-	gl = gi->gl;
-	if (gl) {
-		gi->gl = hlist_entry(gl->gl_list.next,
-				     struct gfs2_glock, gl_list);
-	} else {
-		gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
-				     struct gfs2_glock, gl_list);
-	}
-	if (gi->gl)
-		gfs2_glock_hold(gi->gl);
-	read_unlock(gl_lock_addr(gi->hash));
-	if (gl)
-		gfs2_glock_put(gl);
-	while (gi->gl == NULL) {
-		gi->hash++;
-		if (gi->hash >= GFS2_GL_HASH_SIZE)
-			return 1;
-		read_lock(gl_lock_addr(gi->hash));
-		gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
-				     struct gfs2_glock, gl_list);
-		if (gi->gl)
-			gfs2_glock_hold(gi->gl);
-		read_unlock(gl_lock_addr(gi->hash));
-	}
-
-	if (gi->sdp != gi->gl->gl_sbd)
-		goto restart;
+	do {
+		gl = gi->gl;
+		if (gl) {
+			gi->gl = glock_hash_next(gl);
+		} else {
+			gi->gl = glock_hash_chain(gi->hash);
+		}
+		while (gi->gl == NULL) {
+			gi->hash++;
+			if (gi->hash >= GFS2_GL_HASH_SIZE) {
+				rcu_read_unlock();
+				return 1;
+			}
+			gi->gl = glock_hash_chain(gi->hash);
+		}
+	/* Skip entries for other sb and dead entries */
+	} while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
 
 	return 0;
 }
 
-static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
-{
-	if (gi->gl)
-		gfs2_glock_put(gi->gl);
-	gi->gl = NULL;
-}
-
 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct gfs2_glock_iter *gi = seq->private;
 	loff_t n = *pos;
 
 	gi->hash = 0;
+	rcu_read_lock();
 
 	do {
-		if (gfs2_glock_iter_next(gi)) {
-			gfs2_glock_iter_free(gi);
+		if (gfs2_glock_iter_next(gi))
 			return NULL;
-		}
 	} while (n--);
 
 	return gi->gl;
@@ -1870,10 +1773,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
 
 	(*pos)++;
 
-	if (gfs2_glock_iter_next(gi)) {
-		gfs2_glock_iter_free(gi);
+	if (gfs2_glock_iter_next(gi))
 		return NULL;
-	}
 
 	return gi->gl;
 }
@@ -1881,7 +1782,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
 {
 	struct gfs2_glock_iter *gi = seq->private;
-	gfs2_glock_iter_free(gi);
+
+	if (gi->gl)
+		rcu_read_unlock();
+	gi->gl = NULL;
 }
 
 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 691851c..afa8bfe 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -118,7 +118,7 @@ struct lm_lockops {
 	int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
  	void (*lm_unmount) (struct gfs2_sbd *sdp);
 	void (*lm_withdraw) (struct gfs2_sbd *sdp);
-	void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
+	void (*lm_put_lock) (struct gfs2_glock *gl);
 	int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
 			unsigned int flags);
 	void (*lm_cancel) (struct gfs2_glock *gl);
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
 		   int create, struct gfs2_glock **glp);
 void gfs2_glock_hold(struct gfs2_glock *gl);
 void gfs2_glock_put_nolock(struct gfs2_glock *gl);
-int gfs2_glock_put(struct gfs2_glock *gl);
+void gfs2_glock_put(struct gfs2_glock *gl);
 void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
 		      struct gfs2_holder *gh);
 void gfs2_holder_reinit(unsigned int state, unsigned flags,
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
 	return error;
 }
 
-/*  Lock Value Block functions  */
-
-int gfs2_lvb_hold(struct gfs2_glock *gl);
-void gfs2_lvb_unhold(struct gfs2_glock *gl);
-
-void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
-void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
-void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
-void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-
-int __init gfs2_glock_init(void);
-void gfs2_glock_exit(void);
-
-int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
-void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
-int gfs2_register_debugfs(void);
-void gfs2_unregister_debugfs(void);
+extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
+extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
+extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
+extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
+extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
+extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
+extern void gfs2_glock_free(struct rcu_head *rcu);
+
+extern int __init gfs2_glock_init(void);
+extern void gfs2_glock_exit(void);
+
+extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
+extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
+extern int gfs2_register_debugfs(void);
+extern void gfs2_unregister_debugfs(void);
 
 extern const struct lm_lockops gfs2_dlm_ops;
 
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 263561b..ac5fac9 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -206,8 +206,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
 static int inode_go_demote_ok(const struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
+	struct gfs2_holder *gh;
+
 	if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
 		return 0;
+
+	if (!list_empty(&gl->gl_holders)) {
+		gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+		if (gh->gh_list.next != &gl->gl_holders)
+			return 0;
+	}
+
 	return 1;
 }
 
@@ -272,19 +281,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 }
 
 /**
- * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
- * @gl: the glock
- *
- * Returns: 1 if it's ok
- */
-
-static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
-{
-	const struct address_space *mapping = (const struct address_space *)(gl + 1);
-	return !mapping->nrpages;
-}
-
-/**
  * rgrp_go_lock - operation done after an rgrp lock is locked by
  *    a first holder on this node.
  * @gl: the glock
@@ -410,7 +406,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
 const struct gfs2_glock_operations gfs2_rgrp_glops = {
 	.go_xmote_th = rgrp_go_sync,
 	.go_inval = rgrp_go_inval,
-	.go_demote_ok = rgrp_go_demote_ok,
 	.go_lock = rgrp_go_lock,
 	.go_unlock = rgrp_go_unlock,
 	.go_dump = gfs2_rgrp_dump,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a79790c..720c1e6 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -15,6 +15,8 @@
 #include <linux/workqueue.h>
 #include <linux/dlm.h>
 #include <linux/buffer_head.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
@@ -201,7 +203,7 @@ enum {
 };
 
 struct gfs2_glock {
-	struct hlist_node gl_list;
+	struct hlist_bl_node gl_list;
 	unsigned long gl_flags;		/* GLF_... */
 	struct lm_lockname gl_name;
 	atomic_t gl_ref;
@@ -234,6 +236,7 @@ struct gfs2_glock {
 	atomic_t gl_ail_count;
 	struct delayed_work gl_work;
 	struct work_struct gl_delete;
+	struct rcu_head gl_rcu;
 };
 
 #define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6e493ae..c80485c 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg)
 {
 	struct gfs2_glock *gl = arg;
 	unsigned ret = gl->gl_state;
-	struct gfs2_sbd *sdp = gl->gl_sbd;
 
 	BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
 
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg)
 
 	switch (gl->gl_lksb.sb_status) {
 	case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
-		if (gl->gl_ops->go_flags & GLOF_ASPACE)
-			kmem_cache_free(gfs2_glock_aspace_cachep, gl);
-		else
-			kmem_cache_free(gfs2_glock_cachep, gl);
-		if (atomic_dec_and_test(&sdp->sd_glock_disposal))
-			wake_up(&sdp->sd_glock_wait);
+		call_rcu(&gl->gl_rcu, gfs2_glock_free);
 		return;
 	case -DLM_ECANCEL: /* Cancel while getting lock */
 		ret |= LM_OUT_CANCELED;
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
 			GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
 }
 
-static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
+static void gdlm_put_lock(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	int error;
 
 	if (gl->gl_lksb.sb_lkid == 0) {
-		kmem_cache_free(cachep, gl);
-		if (atomic_dec_and_test(&sdp->sd_glock_disposal))
-			wake_up(&sdp->sd_glock_wait);
+		call_rcu(&gl->gl_rcu, gfs2_glock_free);
 		return;
 	}
 
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f82..11a73ef 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -91,7 +91,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	}
 	bd->bd_ail = ai;
 	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
-	clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+	if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
+		gfs2_glock_schedule_for_reclaim(bd->bd_gl);
 	trace_gfs2_pin(bd, 0);
 	gfs2_log_unlock(sdp);
 	unlock_buffer(bh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab..d850004 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,6 +14,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
 #include <asm/atomic.h>
 
 #include "gfs2.h"
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo)
 {
 	struct gfs2_glock *gl = foo;
 
-	INIT_HLIST_NODE(&gl->gl_list);
+	INIT_HLIST_BL_NODE(&gl->gl_list);
 	spin_lock_init(&gl->gl_spin);
 	INIT_LIST_HEAD(&gl->gl_holders);
 	INIT_LIST_HEAD(&gl->gl_lru);
@@ -198,6 +200,8 @@ static void __exit exit_gfs2_fs(void)
 	unregister_filesystem(&gfs2meta_fs_type);
 	destroy_workqueue(gfs_recovery_wq);
 
+	rcu_barrier();
+
 	kmem_cache_destroy(gfs2_quotad_cachep);
 	kmem_cache_destroy(gfs2_rgrpd_cachep);
 	kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 777927c..a39c103 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -928,12 +928,9 @@ static const match_table_t nolock_tokens = {
 	{ Opt_err, NULL },
 };
 
-static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
+static void nolock_put_lock(struct gfs2_glock *gl)
 {
-	struct gfs2_sbd *sdp = gl->gl_sbd;
-	kmem_cache_free(cachep, gl);
-	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
-		wake_up(&sdp->sd_glock_wait);
+	call_rcu(&gl->gl_rcu, gfs2_glock_free);
 }
 
 static const struct lm_lockops nolock_ops = {
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 02/15] GFS2: Post-VFS scale update for RCU path walk
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 01/15] GFS2: Use RCU for glock hash table Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 03/15] GFS2: Fix glock queue trace point Steven Whitehouse
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

We can allow a few more cases to use RCU path walking than
originally allowed. It should be possible to also enable
RCU path walking when the glock is already cached. Thats
a bit more complicated though, so left for a future patch.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Nick Piggin <npiggin@gmail.com>

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7118f1a..cbc0715 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
 	struct posix_acl *acl;
 	int error;
 
-	if (flags & IPERM_FLAG_RCU)
-		return -ECHILD;
+	if (flags & IPERM_FLAG_RCU) {
+		if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+			return -ECHILD;
+		return -EAGAIN;
+	}
 
 	acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
 	if (IS_ERR(acl))
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d8b26ac..09e436a 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
 
 /**
  * gfs2_permission -
- * @inode:
- * @mask:
- * @nd: passed from Linux VFS, ignored by us
+ * @inode: The inode
+ * @mask: The mask to be tested
+ * @flags: Indicates whether this is an RCU path walk or not
  *
  * This may be called from the VFS directly, or from within GFS2 with the
  * inode locked, so we look to see if the glock is already locked and only
@@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
 	int error;
 	int unlock = 0;
 
-	if (flags & IPERM_FLAG_RCU)
-		return -ECHILD;
 
 	ip = GFS2_I(inode);
 	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+		if (flags & IPERM_FLAG_RCU)
+			return -ECHILD;
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 		if (error)
 			return error;
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 03/15] GFS2: Fix glock queue trace point
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 01/15] GFS2: Use RCU for glock hash table Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 02/15] GFS2: Post-VFS scale update for RCU path walk Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 04/15] GFS2: Improve cluster mmap scalability Steven Whitehouse
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Somehow this tracepoint landed up in the wrong place. This moves it
to where it should be.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c75d499..ddc3e1e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -972,13 +972,13 @@ fail:
 			insert_pt = &gh2->gh_list;
 	}
 	set_bit(GLF_QUEUED, &gl->gl_flags);
+	trace_gfs2_glock_queue(gh, 1);
 	if (likely(insert_pt == NULL)) {
 		list_add_tail(&gh->gh_list, &gl->gl_holders);
 		if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
 			goto do_cancel;
 		return;
 	}
-	trace_gfs2_glock_queue(gh, 1);
 	list_add_tail(&gh->gh_list, insert_pt);
 do_cancel:
 	gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 04/15] GFS2: Improve cluster mmap scalability
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (2 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 03/15] GFS2: Fix glock queue trace point Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 05/15] GFS2: panics on quotacheck update Steven Whitehouse
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

The mmap system call grabs a glock when an update to atime maybe
required. It does this in order to ensure that the flags on the
inode are uptodate, but since it will only mark atime for a future
update, an exclusive lock is not required here (one will be taken
later when the actual update is performed).

Also, the lock can be skipped when the mount is marked noatime in
addition to the original check which only looked at the noatime
flag for the inode itself.

This should increase the scalability of the mmap call when multiple
nodes are all mmaping the same file.

Reported-by: Scooter Morris <scooter@cgl.ucsf.edu>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7cfdcb9..216ad27 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 
-	if (!(file->f_flags & O_NOATIME)) {
+	if (!(file->f_flags & O_NOATIME) &&
+	    !IS_NOATIME(&ip->i_inode)) {
 		struct gfs2_holder i_gh;
 		int error;
 
-		gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
 		error = gfs2_glock_nq(&i_gh);
-		file_accessed(file);
-		if (error == 0)
-			gfs2_glock_dq_uninit(&i_gh);
+		if (error == 0) {
+			file_accessed(file);
+			gfs2_glock_dq(&i_gh);
+		}
+		gfs2_holder_uninit(&i_gh);
+		if (error)
+			return error;
 	}
 	vma->vm_ops = &gfs2_vm_ops;
 	vma->vm_flags |= VM_CAN_NONLINEAR;
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 05/15] GFS2: panics on quotacheck update
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (3 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 04/15] GFS2: Improve cluster mmap scalability Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 06/15] GFS2: deallocation performance patch Steven Whitehouse
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: Abhijith Das <adas@redhat.com>

Handle block allocation for forceful unstuffing of quota dinode during quota
update using quotactl(). Also fix block reservation for special cases when
quotas cross over block boundaries and update 2 blocks instead of 1.

Signed-off-by: Abhi Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a689901..6ec964c 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1587,6 +1587,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 
 	offset = qd2offset(qd);
 	alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
+	if (gfs2_is_stuffed(ip))
+		alloc_required = 1;
 	if (alloc_required) {
 		al = gfs2_alloc_get(ip);
 		if (al == NULL)
@@ -1600,7 +1602,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 		blocks += gfs2_rg_blocks(al);
 	}
 
-	error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0);
+	/* Some quotas span block boundaries and can update two blocks,
+	   adding an extra block to the transaction to handle such quotas */
+	error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0);
 	if (error)
 		goto out_release;
 
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 06/15] GFS2: deallocation performance patch
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (4 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 05/15] GFS2: panics on quotacheck update Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 07/15] GFS2: quota allows exceeding hard limit Steven Whitehouse
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: Bob Peterson <rpeterso@redhat.com>

This patch is a performance improvement to GFS2's dealloc code.
Rather than update the quota file and statfs file for every
single block that's stripped off in unlink function do_strip,
this patch keeps track and updates them once for every layer
that's stripped.  This is done entirely inside the existing
transaction, so there should be no risk of corruption.
The other functions that deallocate blocks will be unaffected
because they are using wrapper functions that do the same
thing that they do today.

I tested this code on my roth cluster by creating 200
files in a directory, each of which is 100MB, then on
four nodes, I simultaneously deleted the files, thus competing
for GFS2 resources (but different files).  The commands
I used were:

[root at roth-01]# time for i in `seq 1 4 200` ; do rm /mnt/gfs2/bigdir/gfs2.$i; done
[root at roth-02]# time for i in `seq 2 4 200` ; do rm /mnt/gfs2/bigdir/gfs2.$i; done
[root at roth-03]# time for i in `seq 3 4 200` ; do rm /mnt/gfs2/bigdir/gfs2.$i; done
[root at roth-05]# time for i in `seq 4 4 200` ; do rm /mnt/gfs2/bigdir/gfs2.$i; done

The performance increase was significant:

             roth-01     roth-02     roth-03     roth-05
             ---------   ---------   ---------   ---------
old: real    0m34.027    0m25.021s   0m23.906s   0m35.646s
new: real    0m22.379s   0m24.362s   0m24.133s   0m18.562s

Total time spent deleting:
old: 118.6s
new:  89.4

For this particular case, this showed a 25% performance increase for
GFS2 unlinks.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c4039d..ef3dc4b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -21,6 +21,7 @@
 #include "meta_io.h"
 #include "quota.h"
 #include "rgrp.h"
+#include "super.h"
 #include "trans.h"
 #include "dir.h"
 #include "util.h"
@@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrp_list rlist;
 	u64 bn, bstart;
-	u32 blen;
+	u32 blen, btotal;
 	__be64 *p;
 	unsigned int rg_blocks = 0;
 	int metadata;
@@ -839,6 +840,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 
 	bstart = 0;
 	blen = 0;
+	btotal = 0;
 
 	for (p = top; p < bottom; p++) {
 		if (!*p)
@@ -851,9 +853,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 		else {
 			if (bstart) {
 				if (metadata)
-					gfs2_free_meta(ip, bstart, blen);
+					__gfs2_free_meta(ip, bstart, blen);
 				else
-					gfs2_free_data(ip, bstart, blen);
+					__gfs2_free_data(ip, bstart, blen);
+
+				btotal += blen;
 			}
 
 			bstart = bn;
@@ -865,11 +869,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	}
 	if (bstart) {
 		if (metadata)
-			gfs2_free_meta(ip, bstart, blen);
+			__gfs2_free_meta(ip, bstart, blen);
 		else
-			gfs2_free_data(ip, bstart, blen);
+			__gfs2_free_data(ip, bstart, blen);
+
+		btotal += blen;
 	}
 
+	gfs2_statfs_change(sdp, 0, +btotal, 0);
+	gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
+			  ip->i_inode.i_gid);
+
 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
 	gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7293ea2..cf930cd 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1602,7 +1602,7 @@ rgrp_error:
  *
  */
 
-void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd;
@@ -1617,7 +1617,21 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 
 	gfs2_trans_add_rg(rgd);
+}
 
+/**
+ * gfs2_free_data - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+
+	__gfs2_free_data(ip, bstart, blen);
 	gfs2_statfs_change(sdp, 0, +blen, 0);
 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
 }
@@ -1630,7 +1644,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
  *
  */
 
-void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd;
@@ -1645,10 +1659,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 
 	gfs2_trans_add_rg(rgd);
+	gfs2_meta_wipe(ip, bstart, blen);
+}
 
+/**
+ * gfs2_free_meta - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+
+	__gfs2_free_meta(ip, bstart, blen);
 	gfs2_statfs_change(sdp, 0, +blen, 0);
 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
-	gfs2_meta_wipe(ip, bstart, blen);
 }
 
 void gfs2_unlink_di(struct inode *inode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 50c2bb0..a80e303 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
 extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
 extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
 
+extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
 extern void gfs2_unlink_di(struct inode *inode);
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 07/15] GFS2: quota allows exceeding hard limit
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (5 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 06/15] GFS2: deallocation performance patch Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 08/15] GFS2: Fix glock deallocation race Steven Whitehouse
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: Abhijith Das <adas@redhat.com>

Immediately after being synced to disk, cached quotas are zeroed out and a
subsequent access of the cached quotas results in incorrect zero values. This
meant that gfs2 assumed the actual usage to be the zero (or near-zero) usage
values it found in the cached quotas and comparison against warn/limits never
triggered a quota violation.

This patch adds a new flag QDF_REFRESH that is set after a sync so that the
cached quotas are forcefully refreshed from disk on a subsequent access on
seeing this flag set.

Resolves: rhbz#675944
Signed-off-by: Abhi Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 720c1e6..59aaaa0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -317,6 +317,7 @@ enum {
 	QDF_USER		= 0,
 	QDF_CHANGE		= 1,
 	QDF_LOCKED		= 2,
+	QDF_REFRESH		= 3,
 };
 
 struct gfs2_quota_data {
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6ec964c..e23d986 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -834,6 +834,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 			goto out_end_trans;
 
 		do_qc(qd, -qd->qd_change_sync);
+		set_bit(QDF_REFRESH, &qd->qd_flags);
 	}
 
 	error = 0;
@@ -929,6 +930,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_alloc *al = ip->i_alloc;
+	struct gfs2_quota_data *qd;
 	unsigned int x;
 	int error = 0;
 
@@ -942,7 +944,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 	     sort_qd, NULL);
 
 	for (x = 0; x < al->al_qd_num; x++) {
-		error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
+		int force = NO_FORCE;
+		qd = al->al_qd[x];
+		if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
+			force = FORCE;
+		error = do_glock(qd, force, &al->al_qd_ghs[x]);
 		if (error)
 			break;
 	}
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 08/15] GFS2: Fix glock deallocation race
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (6 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 07/15] GFS2: quota allows exceeding hard limit Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 09/15] GFS2: Remove potential race in flock code Steven Whitehouse
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch fixes a race in deallocating glocks which was introduced
in the RCU glock patch. We need to ensure that the glock count is
kept correct even in the case that there is a race to add a new
glock into the hash table. Also, to avoid having to wait for an
RCU grace period, the glock counter can be decremented before
call_rcu() is called.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddc3e1e..3f45a14 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -103,16 +103,21 @@ static inline void spin_unlock_bucket(unsigned int hash)
 	__bit_spin_unlock(0, (unsigned long *)bl);
 }
 
-void gfs2_glock_free(struct rcu_head *rcu)
+static void gfs2_glock_dealloc(struct rcu_head *rcu)
 {
 	struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
-	struct gfs2_sbd *sdp = gl->gl_sbd;
 
 	if (gl->gl_ops->go_flags & GLOF_ASPACE)
 		kmem_cache_free(gfs2_glock_aspace_cachep, gl);
 	else
 		kmem_cache_free(gfs2_glock_cachep, gl);
+}
+
+void gfs2_glock_free(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_sbd;
 
+	call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
 	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
 		wake_up(&sdp->sd_glock_wait);
 }
@@ -760,6 +765,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	if (tmp) {
 		spin_unlock_bucket(hash);
 		kmem_cache_free(cachep, gl);
+		atomic_dec(&sdp->sd_glock_disposal);
 		gl = tmp;
 	} else {
 		hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index afa8bfe..aea1606 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -230,7 +230,7 @@ extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
 extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
 extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
-extern void gfs2_glock_free(struct rcu_head *rcu);
+extern void gfs2_glock_free(struct gfs2_glock *gl);
 
 extern int __init gfs2_glock_init(void);
 extern void gfs2_glock_exit(void);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index c80485c..98c80d8 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -30,7 +30,7 @@ static void gdlm_ast(void *arg)
 
 	switch (gl->gl_lksb.sb_status) {
 	case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
-		call_rcu(&gl->gl_rcu, gfs2_glock_free);
+		gfs2_glock_free(gl);
 		return;
 	case -DLM_ECANCEL: /* Cancel while getting lock */
 		ret |= LM_OUT_CANCELED;
@@ -165,7 +165,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 	int error;
 
 	if (gl->gl_lksb.sb_lkid == 0) {
-		call_rcu(&gl->gl_rcu, gfs2_glock_free);
+		gfs2_glock_free(gl);
 		return;
 	}
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a39c103..67654d0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -928,14 +928,9 @@ static const match_table_t nolock_tokens = {
 	{ Opt_err, NULL },
 };
 
-static void nolock_put_lock(struct gfs2_glock *gl)
-{
-	call_rcu(&gl->gl_rcu, gfs2_glock_free);
-}
-
 static const struct lm_lockops nolock_ops = {
 	.lm_proto_name = "lock_nolock",
-	.lm_put_lock = nolock_put_lock,
+	.lm_put_lock = gfs2_glock_free,
 	.lm_tokens = &nolock_tokens,
 };
 
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 09/15] GFS2: Remove potential race in flock code
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (7 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 08/15] GFS2: Fix glock deallocation race Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 10/15] GFS2: Optimize glock multiple-dequeue code Steven Whitehouse
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch ensures that we always wait for glock demotion when
dropping flocks on a file in order to prevent any race
conditions associated with further flock calls or closing
the file.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 216ad27..2878481 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -981,8 +981,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
 
 	mutex_lock(&fp->f_fl_mutex);
 	flock_lock_file_wait(file, fl);
-	if (fl_gh->gh_gl)
-		gfs2_glock_dq_uninit(fl_gh);
+	if (fl_gh->gh_gl) {
+		gfs2_glock_dq_wait(fl_gh);
+		gfs2_holder_uninit(fl_gh);
+	}
 	mutex_unlock(&fp->f_fl_mutex);
 }
 
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 10/15] GFS2: Optimize glock multiple-dequeue code
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (8 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 09/15] GFS2: Remove potential race in flock code Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 11/15] GFS2: fix block allocation check for fallocate Steven Whitehouse
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: Bob Peterson <rpeterso@redhat.com>

This is a small patch that optimizes multiple glock dequeue
operations.  It changes the unlock order to be more efficient
and makes it easier for lock debugging tools to unravel.  It
also eliminates the need for the temp variable x, although
that would likely be optimized out.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3f45a14..8648409 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1248,10 +1248,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 
 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 {
-	unsigned int x;
-
-	for (x = 0; x < num_gh; x++)
-		gfs2_glock_dq(&ghs[x]);
+	while (num_gh--)
+		gfs2_glock_dq(&ghs[num_gh]);
 }
 
 /**
@@ -1263,10 +1261,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 
 void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
 {
-	unsigned int x;
-
-	for (x = 0; x < num_gh; x++)
-		gfs2_glock_dq_uninit(&ghs[x]);
+	while (num_gh--)
+		gfs2_glock_dq_uninit(&ghs[num_gh]);
 }
 
 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 11/15] GFS2: fix block allocation check for fallocate
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (9 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 10/15] GFS2: Optimize glock multiple-dequeue code Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 12/15] GFS2: introduce AIL lock Steven Whitehouse
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: Benjamin Marzinski <bmarzins@redhat.com>

GFS2 fallocate wasn't properly checking if a blocks were already allocated.
In write_empty_blocks(), if a page didn't have buffer_heads attached, GFS2
was always treating it as if there were no blocks allocated for that page.
GFS2 now calls gfs2_block_map() to check if the blocks are allocated before
writing them out.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 2878481..4074b95 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -622,8 +622,7 @@ static void empty_write_end(struct page *page, unsigned from,
 {
 	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
 
-	page_zero_new_buffers(page, from, to);
-	flush_dcache_page(page);
+	zero_user(page, from, to-from);
 	mark_page_accessed(page);
 
 	if (!gfs2_is_writeback(ip))
@@ -632,36 +631,43 @@ static void empty_write_end(struct page *page, unsigned from,
 	block_commit_write(page, from, to);
 }
 
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+static int needs_empty_write(sector_t block, struct inode *inode)
 {
-	unsigned start, end, next;
-	struct buffer_head *bh, *head;
 	int error;
+	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
 
-	if (!page_has_buffers(page)) {
-		error = __block_write_begin(page, from, to - from, gfs2_block_map);
-		if (unlikely(error))
-			return error;
+	bh_map.b_size = 1 << inode->i_blkbits;
+	error = gfs2_block_map(inode, block, &bh_map, 0);
+	if (unlikely(error))
+		return error;
+	return !buffer_mapped(&bh_map);
+}
 
-		empty_write_end(page, from, to);
-		return 0;
-	}
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	unsigned start, end, next, blksize;
+	sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	int ret;
 
-	bh = head = page_buffers(page);
+	blksize = 1 << inode->i_blkbits;
 	next = end = 0;
 	while (next < from) {
-		next += bh->b_size;
-		bh = bh->b_this_page;
+		next += blksize;
+		block++;
 	}
 	start = next;
 	do {
-		next += bh->b_size;
-		if (buffer_mapped(bh)) {
+		next += blksize;
+		ret = needs_empty_write(block, inode);
+		if (unlikely(ret < 0))
+			return ret;
+		if (ret == 0) {
 			if (end) {
-				error = __block_write_begin(page, start, end - start,
-							    gfs2_block_map);
-				if (unlikely(error))
-					return error;
+				ret = __block_write_begin(page, start, end - start,
+							  gfs2_block_map);
+				if (unlikely(ret))
+					return ret;
 				empty_write_end(page, start, end);
 				end = 0;
 			}
@@ -669,13 +675,13 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 		}
 		else
 			end = next;
-		bh = bh->b_this_page;
+		block++;
 	} while (next < to);
 
 	if (end) {
-		error = __block_write_begin(page, start, end - start, gfs2_block_map);
-		if (unlikely(error))
-			return error;
+		ret = __block_write_begin(page, start, end - start, gfs2_block_map);
+		if (unlikely(ret))
+			return ret;
 		empty_write_end(page, start, end);
 	}
 
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 12/15] GFS2: introduce AIL lock
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (10 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 11/15] GFS2: fix block allocation check for fallocate Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 13/15] GFS2: Update to AIL list locking Steven Whitehouse
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: Dave Chinner <dchinner@redhat.com>

The log lock is currently used to protect the AIL lists and
the movements of buffers into and out of them. The lists
are self contained and no log specific items outside the
lists are accessed when starting or emptying the AIL lists.

Hence the operation of the AIL does not require the protection
of the log lock so split them out into a new AIL specific lock
to reduce the amount of traffic on the log lock. This will
also reduce the amount of serialisation that occurs when
the gfs2_logd pushes on the AIL to move it forward.

This reduces the impact of log pushing on sequential write
throughput.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index ac5fac9..3754e3c 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -56,20 +56,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
 	BUG_ON(current->journal_info);
 	current->journal_info = &tr;
 
-	gfs2_log_lock(sdp);
+	spin_lock(&sdp->sd_ail_lock);
 	while (!list_empty(head)) {
 		bd = list_entry(head->next, struct gfs2_bufdata,
 				bd_ail_gl_list);
 		bh = bd->bd_bh;
 		gfs2_remove_from_ail(bd);
+		spin_unlock(&sdp->sd_ail_lock);
+
 		bd->bd_bh = NULL;
 		bh->b_private = NULL;
 		bd->bd_blkno = bh->b_blocknr;
+		gfs2_log_lock(sdp);
 		gfs2_assert_withdraw(sdp, !buffer_busy(bh));
 		gfs2_trans_add_revoke(sdp, bd);
+		gfs2_log_unlock(sdp);
+
+		spin_lock(&sdp->sd_ail_lock);
 	}
 	gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
-	gfs2_log_unlock(sdp);
+	spin_unlock(&sdp->sd_ail_lock);
 
 	gfs2_trans_end(sdp);
 	gfs2_log_flush(sdp, NULL);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 59aaaa0..870a89d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -651,6 +651,7 @@ struct gfs2_sbd {
 	unsigned int sd_log_flush_head;
 	u64 sd_log_flush_wrapped;
 
+	spinlock_t sd_ail_lock;
 	struct list_head sd_ail1_list;
 	struct list_head sd_ail2_list;
 	u64 sd_ail_sync_gen;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index eb01f35..4e3c044 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -88,8 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
  */
 
 static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
-__releases(&sdp->sd_log_lock)
-__acquires(&sdp->sd_log_lock)
+__releases(&sdp->sd_ail_lock)
+__acquires(&sdp->sd_ail_lock)
 {
 	struct gfs2_bufdata *bd, *s;
 	struct buffer_head *bh;
@@ -117,7 +117,7 @@ __acquires(&sdp->sd_log_lock)
 			list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
 
 			get_bh(bh);
-			gfs2_log_unlock(sdp);
+			spin_unlock(&sdp->sd_ail_lock);
 			lock_buffer(bh);
 			if (test_clear_buffer_dirty(bh)) {
 				bh->b_end_io = end_buffer_write_sync;
@@ -126,7 +126,7 @@ __acquires(&sdp->sd_log_lock)
 				unlock_buffer(bh);
 				brelse(bh);
 			}
-			gfs2_log_lock(sdp);
+			spin_lock(&sdp->sd_ail_lock);
 
 			retry = 1;
 			break;
@@ -175,10 +175,10 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
 	struct gfs2_ail *ai;
 	int done = 0;
 
-	gfs2_log_lock(sdp);
+	spin_lock(&sdp->sd_ail_lock);
 	head = &sdp->sd_ail1_list;
 	if (list_empty(head)) {
-		gfs2_log_unlock(sdp);
+		spin_unlock(&sdp->sd_ail_lock);
 		return;
 	}
 	sync_gen = sdp->sd_ail_sync_gen++;
@@ -189,13 +189,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
 			if (ai->ai_sync_gen >= sync_gen)
 				continue;
 			ai->ai_sync_gen = sync_gen;
-			gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */
+			gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
 			done = 0;
 			break;
 		}
 	}
 
-	gfs2_log_unlock(sdp);
+	spin_unlock(&sdp->sd_ail_lock);
 }
 
 static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
@@ -203,7 +203,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
 	struct gfs2_ail *ai, *s;
 	int ret;
 
-	gfs2_log_lock(sdp);
+	spin_lock(&sdp->sd_ail_lock);
 
 	list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
 		if (gfs2_ail1_empty_one(sdp, ai, flags))
@@ -214,7 +214,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
 
 	ret = list_empty(&sdp->sd_ail1_list);
 
-	gfs2_log_unlock(sdp);
+	spin_unlock(&sdp->sd_ail_lock);
 
 	return ret;
 }
@@ -247,7 +247,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 	int wrap = (new_tail < old_tail);
 	int a, b, rm;
 
-	gfs2_log_lock(sdp);
+	spin_lock(&sdp->sd_ail_lock);
 
 	list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
 		a = (old_tail <= ai->ai_first);
@@ -263,7 +263,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 		kfree(ai);
 	}
 
-	gfs2_log_unlock(sdp);
+	spin_unlock(&sdp->sd_ail_lock);
 }
 
 /**
@@ -421,7 +421,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
 	struct gfs2_ail *ai;
 	unsigned int tail;
 
-	gfs2_log_lock(sdp);
+	spin_lock(&sdp->sd_ail_lock);
 
 	if (list_empty(&sdp->sd_ail1_list)) {
 		tail = sdp->sd_log_head;
@@ -430,7 +430,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
 		tail = ai->ai_first;
 	}
 
-	gfs2_log_unlock(sdp);
+	spin_unlock(&sdp->sd_ail_lock);
 
 	return tail;
 }
@@ -743,10 +743,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	sdp->sd_log_commited_databuf = 0;
 	sdp->sd_log_commited_revoke = 0;
 
+	spin_lock(&sdp->sd_ail_lock);
 	if (!list_empty(&ai->ai_ail1_list)) {
 		list_add(&ai->ai_list, &sdp->sd_ail1_list);
 		ai = NULL;
 	}
+	spin_unlock(&sdp->sd_ail_lock);
 	gfs2_log_unlock(sdp);
 	trace_gfs2_log_flush(sdp, 0);
 	up_write(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 11a73ef..4295a6a 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -80,7 +80,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	mark_buffer_dirty(bh);
 	clear_buffer_pinned(bh);
 
-	gfs2_log_lock(sdp);
+	spin_lock(&sdp->sd_ail_lock);
 	if (bd->bd_ail) {
 		list_del(&bd->bd_ail_st_list);
 		brelse(bh);
@@ -91,10 +91,11 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	}
 	bd->bd_ail = ai;
 	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+	spin_unlock(&sdp->sd_ail_lock);
+
 	if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
 		gfs2_glock_schedule_for_reclaim(bd->bd_gl);
 	trace_gfs2_pin(bd, 0);
-	gfs2_log_unlock(sdp);
 	unlock_buffer(bh);
 	atomic_dec(&sdp->sd_log_pinned);
 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 67654d0..42ef243 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
 	init_waitqueue_head(&sdp->sd_log_waitq);
 	init_waitqueue_head(&sdp->sd_logd_waitq);
+	spin_lock_init(&sdp->sd_ail_lock);
 	INIT_LIST_HEAD(&sdp->sd_ail1_list);
 	INIT_LIST_HEAD(&sdp->sd_ail2_list);
 
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 13/15] GFS2: Update to AIL list locking
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (11 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 12/15] GFS2: introduce AIL lock Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 14/15] GFS2: Adding missing unlock_page() Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 15/15] GFS2: Don't use _raw version of RCU dereference Steven Whitehouse
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

The previous patch missed a couple of places where the AIL list
needed locking, so this fixes up those places, plus a comment
is corrected too.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Dave Chinner <dchinner@redhat.com>

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 4e3c044..e7ed31f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -67,7 +67,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
  * @mapping: The associated mapping (maybe NULL)
  * @bd: The gfs2_bufdata to remove
  *
- * The log lock _must_ be held when calling this function
+ * The ail lock _must_ be held when calling this function
  *
  */
 
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4295a6a..e919abf 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 	/* If this buffer is in the AIL and it has already been written
 	 * to in-place disk block, remove it from the AIL.
 	 */
+	spin_lock(&sdp->sd_ail_lock);
 	if (bd->bd_ail)
 		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
+	spin_unlock(&sdp->sd_ail_lock);
 	get_bh(bh);
 	atomic_inc(&sdp->sd_log_pinned);
 	trace_gfs2_pin(bd, 1);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939739c..01d97f4 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -326,6 +326,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
 		brelse(bh);
 	}
 	if (bd) {
+		spin_lock(&sdp->sd_ail_lock);
 		if (bd->bd_ail) {
 			gfs2_remove_from_ail(bd);
 			bh->b_private = NULL;
@@ -333,6 +334,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
 			bd->bd_blkno = bh->b_blocknr;
 			gfs2_trans_add_revoke(sdp, bd);
 		}
+		spin_unlock(&sdp->sd_ail_lock);
 	}
 	clear_buffer_dirty(bh);
 	clear_buffer_uptodate(bh);
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 14/15] GFS2: Adding missing unlock_page()
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (12 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 13/15] GFS2: Update to AIL list locking Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 15/15] GFS2: Don't use _raw version of RCU dereference Steven Whitehouse
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: Maxim <maxim.patlasov@gmail.com>

gfs2_write_begin() calls grab_cache_page_write_begin() that returns *locked*
page. Correspondent error-handling path lacks for unlock_page() call:

> out:
> 	if (error == 0)
> 		return 0;
>
> 	page_cache_release(page);

The whole system hangs if gfs2_unstuff_dinode() called from gfs2_write_begin()
failed for some reason.

Reported-by: Maxim <maxim.patlasov@gmail.com>
Signed-off-by: Maxim <maxim.patlasov@gmail.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4f36f88..aad77e4 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -695,6 +695,7 @@ out:
 	if (error == 0)
 		return 0;
 
+	unlock_page(page);
 	page_cache_release(page);
 
 	gfs2_trans_end(sdp);
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] [PATCH 15/15] GFS2: Don't use _raw version of RCU dereference
  2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
                   ` (13 preceding siblings ...)
  2011-03-15  9:11 ` [Cluster-devel] [PATCH 14/15] GFS2: Adding missing unlock_page() Steven Whitehouse
@ 2011-03-15  9:11 ` Steven Whitehouse
  14 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-03-15  9:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

As per RCU glock patch review comments, don't use the _raw
version of this function here.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8648409..85044b4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1723,7 +1723,7 @@ static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
 
 static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
 {
-	return hlist_bl_entry(rcu_dereference_raw(gl->gl_list.next),
+	return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
 			      struct gfs2_glock, gl_list);
 }
 
-- 
1.7.4



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2011-05-19  8:46 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-05-19  8:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

This time, most of the GFS2 patches are code clean up, although there
are a few bug fixes (fallocate/ail writeback/end of life inodes/nlink) and
some new features (new tracepoint & tracing flags, using the UUID field
in the generic superblock).

The changes can be broadly divided into three sets:

1. Bob's directory code clean up
2. My fsync/ail writeback fixes & clean up
3. inode.c/ops_inode.c clean up

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2011-07-22  9:16 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-07-22  9:16 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Not a lot new this time... the addition of a cache for the directory hash table
improve directory read/lookup speed, automatic adjustment of the glock hold
time improves performance for some contention corner cases. S_NOSEC support
is another performance related change, plus a nice clean up from Eric
Sandeen,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2011-10-24 12:48 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2011-10-24 12:48 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Since the merge window is upon us, here is the current content of
the GFS2 git tree. A few things will be help back to the following
merge window in order to ensure a greater test time, but those currently
in the tree are ready for the current window.

Recently I've reconstituted the GFS2 git tree, so it can be pulled
(via http) from:

http://sucs.org/~rohan/git/gfs2-3.0-nmw

and viewed via gitweb at:

http://sucs.org/gitweb/

This is thanks to the Swansea University Computer Society for providing
a temporary (or possibly permanent) home for the GFS2 git trees. Please
treat their server kindly as this will only continue while it doesn't
generate too much traffic. I figure that there will not be too many
people pulling the GFS2 tree at once, but we'll see.

Some highlights of the current patch set:
 o Reduction in code of approx 400 lines
 o Big clean up (and speed up) in the resource group code
   - This is a nice base to build some forthcoming improvements on
   - It should improve performance with multi-threaded workloads
 o Some left-over fsync/writeback changes
 o Improvements to readahead when deallocating large directories

Any questions/concerns then please let me know as usual,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2012-01-05 11:51 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2012-01-05 11:51 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

The main feature this time is clean up around the allocation and
resource group code. Otherwise the remainder is mostly small
bug fixes.

I've held back the glock stats patch and that will probably be
ready for the following merge window with a bit of luck,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2012-03-19 10:25 Steven Whitehouse
       [not found] ` <4F674696.7030602@xenotime.net>
  0 siblings, 1 reply; 44+ messages in thread
From: Steven Whitehouse @ 2012-03-19 10:25 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Not a huge number of patches this time. Some notable new features
though:
 - Glock stats gathering (v. useful for performance analysis)
 - FITRIM ioctl support
 - Sorting the ordered write list (big performance increase when the workload
   doesn't result in the write requests being nicely ordered to start with)

Plus a few clean ups, and bug fixes in addition,

Steve.




^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
       [not found] ` <4F674696.7030602@xenotime.net>
@ 2012-03-19 14:59   ` Steven Whitehouse
       [not found]     ` <4F674E4F.5080904@xenotime.net>
  0 siblings, 1 reply; 44+ messages in thread
From: Steven Whitehouse @ 2012-03-19 14:59 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Mon, 2012-03-19 at 07:45 -0700, Randy Dunlap wrote:
> On 03/19/2012 03:25 AM, Steven Whitehouse wrote:
> 
> > Hi,
> > 
> > Not a huge number of patches this time. Some notable new features
> > though:
> >  - Glock stats gathering (v. useful for performance analysis)
> >  - FITRIM ioctl support
> >  - Sorting the ordered write list (big performance increase when the workload
> >    doesn't result in the write requests being nicely ordered to start with)
> > 
> > Plus a few clean ups, and bug fixes in addition,
> 
> 
> 
> Hi,
> 
> I reported a build error in linux-next 20120313, but it appears
> that mainline also needs the fix (when it's ready) since mainline
> gfs2 Kconfig selects DLM_SCTP, which does not exist.
> 
> https://lkml.org/lkml/2012/3/13/456
> 

Does the following fix the problem? If so then I'll roll that into the
tree before it gets pushed,

Steve.

diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index c465ae0..f4e1c60 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -4,7 +4,7 @@ config GFS2_FS
 	select DLM if GFS2_FS_LOCKING_DLM
 	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
 	select SYSFS if GFS2_FS_LOCKING_DLM
-	select IP_SCTP if DLM_SCTP
+	select IP_SCTP if GFS2_FS_LOCKING_DLM
 	select FS_POSIX_ACL
 	select CRC32
 	select QUOTACTL




^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
       [not found]     ` <4F674E4F.5080904@xenotime.net>
@ 2012-03-19 15:34       ` Steven Whitehouse
  2012-03-23 19:41         ` David Teigland
  2012-03-20  9:47       ` Steven Whitehouse
  1 sibling, 1 reply; 44+ messages in thread
From: Steven Whitehouse @ 2012-03-19 15:34 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Mon, 2012-03-19 at 08:18 -0700, Randy Dunlap wrote:
> On 03/19/2012 07:59 AM, Steven Whitehouse wrote:
> 
> > Hi,
> > 
> > On Mon, 2012-03-19 at 07:45 -0700, Randy Dunlap wrote:
> >> On 03/19/2012 03:25 AM, Steven Whitehouse wrote:
> >>
> >>> Hi,
> >>>
> >>> Not a huge number of patches this time. Some notable new features
> >>> though:
> >>>  - Glock stats gathering (v. useful for performance analysis)
> >>>  - FITRIM ioctl support
> >>>  - Sorting the ordered write list (big performance increase when the workload
> >>>    doesn't result in the write requests being nicely ordered to start with)
> >>>
> >>> Plus a few clean ups, and bug fixes in addition,
> >>
> >>
> >>
> >> Hi,
> >>
> >> I reported a build error in linux-next 20120313, but it appears
> >> that mainline also needs the fix (when it's ready) since mainline
> >> gfs2 Kconfig selects DLM_SCTP, which does not exist.
> >>
> >> https://lkml.org/lkml/2012/3/13/456
> >>
> > 
> > Does the following fix the problem? If so then I'll roll that into the
> > tree before it gets pushed,
> > 
> 
> No, that's not sufficient:
> 
> warning: (GFS2_FS) selects DLM which has unmet direct dependencies (EXPERIMENTAL && INET && SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n))
> warning: (DLM && GFS2_FS) selects IP_SCTP which has unmet direct dependencies (NET && INET && EXPERIMENTAL && (IPV6 || IPV6=n))
> 
> and
> 
> ERROR: "crc32c" [net/sctp/sctp.ko] undefined!
> 
> 
Hmm, ok. I'll look at this again. I'm not sure why DLM is still calling
itself EXPERIMENTAL since thats long since not been the case, maybe SCTP
still is, but I don't think GFS2 should be selecting EXPERIMENTAL
directly, anyway. It is rather easy to tie ones' self in knots with this
config language.... since GFS2_FS_LOCKING_DLM depends on NET && INET &&
(IPV6 || IPV6=n) && HOTPLUG then all those other deps must presumably be
set anyway, so I don't understand quite why DLM doesn't have those
available to it.

I'll dig around a bit and see if I can figure out whats going on here,

Steve.


> 
> 
> > 
> > diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> > index c465ae0..f4e1c60 100644
> > --- a/fs/gfs2/Kconfig
> > +++ b/fs/gfs2/Kconfig
> > @@ -4,7 +4,7 @@ config GFS2_FS
> >  	select DLM if GFS2_FS_LOCKING_DLM
> >  	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> >  	select SYSFS if GFS2_FS_LOCKING_DLM
> > -	select IP_SCTP if DLM_SCTP
> > +	select IP_SCTP if GFS2_FS_LOCKING_DLM
> >  	select FS_POSIX_ACL
> >  	select CRC32
> >  	select QUOTACTL
> > 
> > 
> 
> 
> 




^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
       [not found]     ` <4F674E4F.5080904@xenotime.net>
  2012-03-19 15:34       ` Steven Whitehouse
@ 2012-03-20  9:47       ` Steven Whitehouse
  1 sibling, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2012-03-20  9:47 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Mon, 2012-03-19 at 08:18 -0700, Randy Dunlap wrote:
> On 03/19/2012 07:59 AM, Steven Whitehouse wrote:
> 
> > Hi,
> > 
> > On Mon, 2012-03-19 at 07:45 -0700, Randy Dunlap wrote:
> >> On 03/19/2012 03:25 AM, Steven Whitehouse wrote:
> >>
> >>> Hi,
> >>>
> >>> Not a huge number of patches this time. Some notable new features
> >>> though:
> >>>  - Glock stats gathering (v. useful for performance analysis)
> >>>  - FITRIM ioctl support
> >>>  - Sorting the ordered write list (big performance increase when the workload
> >>>    doesn't result in the write requests being nicely ordered to start with)
> >>>
> >>> Plus a few clean ups, and bug fixes in addition,
> >>
> >>
> >>
> >> Hi,
> >>
> >> I reported a build error in linux-next 20120313, but it appears
> >> that mainline also needs the fix (when it's ready) since mainline
> >> gfs2 Kconfig selects DLM_SCTP, which does not exist.
> >>
> >> https://lkml.org/lkml/2012/3/13/456
> >>
> > 
> > Does the following fix the problem? If so then I'll roll that into the
> > tree before it gets pushed,
> > 
> 
> No, that's not sufficient:
> 
> warning: (GFS2_FS) selects DLM which has unmet direct dependencies (EXPERIMENTAL && INET && SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n))
> warning: (DLM && GFS2_FS) selects IP_SCTP which has unmet direct dependencies (NET && INET && EXPERIMENTAL && (IPV6 || IPV6=n))
> 
> and
> 
> ERROR: "crc32c" [net/sctp/sctp.ko] undefined!
> 
> 
Since the pending patch set doesn't affect the Kconfig at all, I don't
think that this issue needs to hold up merging the GFS2 tree. We'll
follow up with a fix for this later on,

Steve.




^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
  2012-03-19 15:34       ` Steven Whitehouse
@ 2012-03-23 19:41         ` David Teigland
  2012-03-23 19:46           ` David Miller
       [not found]           ` <4F6CD7AD.9030306@xenotime.net>
  0 siblings, 2 replies; 44+ messages in thread
From: David Teigland @ 2012-03-23 19:41 UTC (permalink / raw)
  To: cluster-devel.redhat.com


> on i386:
>
> ERROR: "sctp_do_peeloff" [fs/dlm/dlm.ko] undefined!
>
>
> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
> used anywhere else in the kernel tree AFAICT.
> DLM just always selects IP_SCTP.

Here's what we have now:

config GFS2_FS
        tristate "GFS2 file system support"
        depends on (64BIT || LBDAF)
        select DLM if GFS2_FS_LOCKING_DLM
        select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
        select SYSFS if GFS2_FS_LOCKING_DLM
        select IP_SCTP if DLM_SCTP
        select FS_POSIX_ACL
        select CRC32
        select QUOTACTL

menuconfig DLM
        tristate "Distributed Lock Manager (DLM)"
        depends on EXPERIMENTAL && INET
        depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
        select IP_SCTP

Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
possibly be undefined if we're selecting SCTP.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
  2012-03-23 19:41         ` David Teigland
@ 2012-03-23 19:46           ` David Miller
       [not found]           ` <4F6CD7AD.9030306@xenotime.net>
  1 sibling, 0 replies; 44+ messages in thread
From: David Miller @ 2012-03-23 19:46 UTC (permalink / raw)
  To: cluster-devel.redhat.com

From: David Teigland <teigland@redhat.com>
Date: Fri, 23 Mar 2012 15:41:52 -0400

> Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
> just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
> vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
> possibly be undefined if we're selecting SCTP.

GFS2=y SCTP=m



^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
       [not found]           ` <4F6CD7AD.9030306@xenotime.net>
@ 2012-03-23 20:09             ` Steven Whitehouse
  2012-03-23 20:18             ` David Teigland
  1 sibling, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2012-03-23 20:09 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, 2012-03-23 at 13:06 -0700, Randy Dunlap wrote:
> On 03/23/2012 12:41 PM, David Teigland wrote:
> 
> > 
> >> on i386:
> >>
> >> ERROR: "sctp_do_peeloff" [fs/dlm/dlm.ko] undefined!
> >>
> >>
> >> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
> >> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
> >> used anywhere else in the kernel tree AFAICT.
> >> DLM just always selects IP_SCTP.
> > 
> > Here's what we have now:
> > 
> > config GFS2_FS
> >         tristate "GFS2 file system support"
> >         depends on (64BIT || LBDAF)
> >         select DLM if GFS2_FS_LOCKING_DLM
> >         select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> >         select SYSFS if GFS2_FS_LOCKING_DLM
> >         select IP_SCTP if DLM_SCTP
> >         select FS_POSIX_ACL
> >         select CRC32
> >         select QUOTACTL
> > 
> > menuconfig DLM
> >         tristate "Distributed Lock Manager (DLM)"
> >         depends on EXPERIMENTAL && INET
> >         depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
> >         select IP_SCTP
> > 
> > Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
> > just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
> > vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
> > possibly be undefined if we're selecting SCTP.
> 
> What is selecting SCTP?  DLM?  so GFS2 selects DLM, but selects
> don't follow dependency chains.  Also, the "select IP_SCTP if DLM_SCTP"
> in GFS2 is meaningless since there is no DLM_SCTP.
> 
> I just verified that the (posted) failing config still fails with
> today's linux-next.
> 

The DLM_SCTP is historical. There used to be such a thing, but that
config option went away, and there is now run time selection of the DLM
transport. So that the GFS2 Kconfig should have been updated, however
that appears not to be enough on its own to resolve the issue,

Steve.




^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
       [not found]           ` <4F6CD7AD.9030306@xenotime.net>
  2012-03-23 20:09             ` Steven Whitehouse
@ 2012-03-23 20:18             ` David Teigland
       [not found]               ` <20120323220618.GA30906@d2.synalogic.ca>
  1 sibling, 1 reply; 44+ messages in thread
From: David Teigland @ 2012-03-23 20:18 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On Fri, Mar 23, 2012 at 01:06:05PM -0700, Randy Dunlap wrote:
> >> GFS2_FS selects DLM (if GFS2_FS_LOCKING_DLM, which is enabled).
> >> GFS2_FS selects IP_SCTP if DLM_SCTP, which is not enabled and not
> >> used anywhere else in the kernel tree AFAICT.
> >> DLM just always selects IP_SCTP.
> > 
> > Here's what we have now:
> > 
> > config GFS2_FS
> >         tristate "GFS2 file system support"
> >         depends on (64BIT || LBDAF)
> >         select DLM if GFS2_FS_LOCKING_DLM
> >         select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> >         select SYSFS if GFS2_FS_LOCKING_DLM
> >         select IP_SCTP if DLM_SCTP
> >         select FS_POSIX_ACL
> >         select CRC32
> >         select QUOTACTL
> > 
> > menuconfig DLM
> >         tristate "Distributed Lock Manager (DLM)"
> >         depends on EXPERIMENTAL && INET
> >         depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
> >         select IP_SCTP
> > 
> > Why does gfs2 Kconfig bother with SCTP at all?  It seems that line should
> > just be removed.  I'll also remove EXPERIMENTAL.  I don't understand the
> > vagaries of Kconfig, so a dumb question, how could sctp_do_peeloff
> > possibly be undefined if we're selecting SCTP.
> 
> What is selecting SCTP?  DLM?  so GFS2 selects DLM, but selects
> don't follow dependency chains.  Also, the "select IP_SCTP if DLM_SCTP"
> in GFS2 is meaningless since there is no DLM_SCTP.

https://lkml.org/lkml/2012/3/8/222 seems to have caused this by adding
the new dependency on the sctp module without any Kconfig changes.

Should that patch have added depends IP_SCTP to the dlm and gfs2?



^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
       [not found]               ` <20120323220618.GA30906@d2.synalogic.ca>
@ 2012-03-26 10:44                 ` Steven Whitehouse
       [not found]                   ` <4F79C733.60604@xenotime.net>
  0 siblings, 1 reply; 44+ messages in thread
From: Steven Whitehouse @ 2012-03-26 10:44 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Fri, 2012-03-23 at 18:06 -0400, Benjamin Poirier wrote:
[snip]
> 
> Instead of trying to select everything in GFS2, how about doing it this way?
> 
> [PATCH] gfs2: use depends instead of select in kconfig
> 
> Avoids having to duplicate the dependencies of what is 'select'ed (and on
> down...)
> 
> Those dependencies are currently incomplete, leading to broken builds with
> GFS2_FS_LOCKING_DLM=y and IP_SCTP=n.
> 
> Signed-off-by: Benjamin Poirier <bpoirier@suse.de>
> ---
>  fs/gfs2/Kconfig |    7 ++-----
>  1 files changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> index c465ae0..eb08c9e 100644
> --- a/fs/gfs2/Kconfig
> +++ b/fs/gfs2/Kconfig
> @@ -1,10 +1,6 @@
>  config GFS2_FS
>  	tristate "GFS2 file system support"
>  	depends on (64BIT || LBDAF)
> -	select DLM if GFS2_FS_LOCKING_DLM
> -	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> -	select SYSFS if GFS2_FS_LOCKING_DLM
> -	select IP_SCTP if DLM_SCTP
>  	select FS_POSIX_ACL
>  	select CRC32
>  	select QUOTACTL
> @@ -29,7 +25,8 @@ config GFS2_FS
>  
>  config GFS2_FS_LOCKING_DLM
>  	bool "GFS2 DLM locking"
> -	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
> +	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> +		HOTPLUG && DLM && CONFIGFS_FS && SYSFS
>  	help
>  	  Multiple node locking module for GFS2
>  

That looks ok to me. I've put it in the GFS2 -fixes tree, and if
everybody is happy with that I'll send a pull request shortly,

Steve.




^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
       [not found]                   ` <4F79C733.60604@xenotime.net>
@ 2012-04-02 15:47                     ` Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2012-04-02 15:47 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Mon, 2012-04-02 at 08:35 -0700, Randy Dunlap wrote:
> On 03/26/2012 03:44 AM, Steven Whitehouse wrote:
> 
> > Hi,
> > 
> > On Fri, 2012-03-23 at 18:06 -0400, Benjamin Poirier wrote:
> > [snip]
> >>
> >> Instead of trying to select everything in GFS2, how about doing it this way?
> >>
> >> [PATCH] gfs2: use depends instead of select in kconfig
> >>
> >> Avoids having to duplicate the dependencies of what is 'select'ed (and on
> >> down...)
> >>
> >> Those dependencies are currently incomplete, leading to broken builds with
> >> GFS2_FS_LOCKING_DLM=y and IP_SCTP=n.
> >>
> >> Signed-off-by: Benjamin Poirier <bpoirier@suse.de>
> >> ---
> >>  fs/gfs2/Kconfig |    7 ++-----
> >>  1 files changed, 2 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
> >> index c465ae0..eb08c9e 100644
> >> --- a/fs/gfs2/Kconfig
> >> +++ b/fs/gfs2/Kconfig
> >> @@ -1,10 +1,6 @@
> >>  config GFS2_FS
> >>  	tristate "GFS2 file system support"
> >>  	depends on (64BIT || LBDAF)
> >> -	select DLM if GFS2_FS_LOCKING_DLM
> >> -	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
> >> -	select SYSFS if GFS2_FS_LOCKING_DLM
> >> -	select IP_SCTP if DLM_SCTP
> >>  	select FS_POSIX_ACL
> >>  	select CRC32
> >>  	select QUOTACTL
> >> @@ -29,7 +25,8 @@ config GFS2_FS
> >>  
> >>  config GFS2_FS_LOCKING_DLM
> >>  	bool "GFS2 DLM locking"
> >> -	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
> >> +	depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \
> >> +		HOTPLUG && DLM && CONFIGFS_FS && SYSFS
> >>  	help
> >>  	  Multiple node locking module for GFS2
> >>  
> > 
> > That looks ok to me. I've put it in the GFS2 -fixes tree, and if
> > everybody is happy with that I'll send a pull request shortly,
> 
> 
> Can we get Benjamin's patch merged, please?
> linux-next is still having build errors without it.
> 

It is in the GFS2 -nmw tree now, so it will be in linux-next shortly.
I'll merge up the -fixes tree shortly, but I'm expecting one more patch
for that very shortly,

Steve.




^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2012-05-17 12:23 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2012-05-17 12:23 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Since the merge window appears to be fast approaching, here are the
current GFS2 patches. This time there are two main themes, one is
updates to the log code, mostly on the writing side. The other is
preparation for some block reservation work which will probably
land in the subsequent merge window.

There is of course the usual collection of cleanup and bug fixes
as well. See the individual patches for the detailed descriptions,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2012-07-23  8:00 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2012-07-23  8:00 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

As usual, here is the content of the GFS2 tree prior to sending
a merge request. Not a huge number of patches this time, but some
interesting features nonetheless.

A number of the earlier patches are aimed at cleaning up the resource
group code for the later patch which implements block reservations.
In addition to that, there are a few patches aimed at improving
the time taken to dump (the potentially rather large) glock debugfs
file. Beyond that there are a couple of bug fixes and thats about it
this time,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2012-09-26  8:25 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2012-09-26  8:25 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

We've collected up a goodly number of patches in the -nmw tree now
and we can hold off any further changes until the following merge
window, so here is the current tree content.

The major feature this time is the "rbm" conversion in the resource
group code. The new struct gfs2_rbm specifies the location of an
allocatable block in (resource group, bitmap, offset) form. There
are a number of added helper functions, and later patches then
rewrite some of the resource group code in terms of this new
structure. Not only does this give us a nice code clean up, but
it also removes some of the previous restructions where extents
could not cross bitmap boundaries, for example.

In addition to that, there are a few bug fixes and clean ups, but
the rbm work is by far the majority of this patch set in terms of
number of changed lines.

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2012-11-30  9:52 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2012-11-30  9:52 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

So yes, this is a bit early, but the tree seems to have settled down
now, and I'd like to hold off any further feature patches until the
subsequent merge window at this stage.

The main feature this time is the new Orlov allocator and the patches
leading up to it which allow us to allocate new inodes from their own
allocation context, rather than borrowing that of their parent directory.
It is this change which then allows us to choose a different location
for subdirectories when required. This works exactly as per the ext3
implementation from the users point of view.

In addition to that, we've got a speed up in gfs2_rbm_from_block()
from Bob Peterson, three locking related improvements from Dave
Teigland plus a selection of smaller bug fixes and clean ups.

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2013-02-19 10:07 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2013-02-19 10:07 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This is one of the smallest collections of patches for the merge
window for some time. There are some clean ups relating to the
transaction code and the shrinker, which are mostly in preparation
for further development, but also make the code much easier to
follow in these areas.

There is a patch which allows the use of ->writepages even in the
default ordered write mode for all writebacks. This results in
sending larger i/os to the block layer, and a subsequent increase
in performance. It also reduces the number of different i/o paths
by one.

There is also a bug fix reinstating the withdraw ack system which
somehow got lost when the lock modules were merged into GFS2.

And thats all this time around,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2013-04-26  9:18 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2013-04-26  9:18 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Since the merge window is coming up soon, I'm posting the content of
the GFS2 -nmw tree as usual. There is not a whole lot of change this
time - there are some further changes which are in the works, but those
will be held over until next time.

Here there are some clean ups to inode creation, the addition of an
origin (local or remote) indicator to glock demote requests, removal
of one of the remaining GFP_NOFAIL allocations during log flushes,
one minor clean up, and a one liner bug fix,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2013-07-01  9:33 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2013-07-01  9:33 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

There are a few bug fixes for various, mostly very minor corner
cases, plus some interesting new features. The new features
include atomic_open whose main benefit will be the reduction in
locking overhead in case of combined lookup/create and open operations,
sorting the log buffer lists by block number to improve the efficiency
of AIL writeback, and agressively issuing revokes in gfs2_log_flush
to reduce overhead when dropping glocks,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2 Pre-pull patch posting (merge window)
@ 2013-09-05  9:02 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2013-09-05  9:02 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

This is the smallest merge window patch set for GFS2 for quite
some time. Only one of the patches (moving gfs2_sync_meta) is
a non-bug fix patch, although the merge ordered and writeback
writepage patch is also a nice clean up.

A couple of the patches are quite recently added, due to my only
having recently returned from holiday, so I'll give them a couple
of extra days in -next before sending the pull request.

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2013-11-04 11:09 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2013-11-04 11:09 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

I'm just back from firstly Edinburgh, and secondly holiday, and the
merge window is again upon us. I've added in the three pending patches
which were under test while I was away and then that should be it for
this time.

The main feature of interest this time is quota updates. There are
some clean ups and some patches to use the new generic lru list
code. There is still plenty of scope for some further changes in
due course - faster lookups of quota structures is very much
on the todo list. Also, a start has been made towards the more tricky
issue of using the generic lru code with glocks, but that will
have to be completed in a subsequent merge window.

The other, more minor feature, is that there have been a number of
performance patches which relate to block allocation. In particular
they will improve performance when the disk is nearly full,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2014-01-20 12:23 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2014-01-20 12:23 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Here are the pending patches for the merge window which are currently
in the GFS2 tree.

The main topics this time are allocation, in the form of Bob's
improvements when searching resource groups and several updates
to quotas which should increase scalability. The quota changes
follow on from those in the last merge window, and there will
likely be further work to come in this area in due course.

There are also a few patches which help to improve efficiency
of adding entries into directories, and clean up some of that
code.

One on-disk change is included this time, which is to write some
additional information which should be useful to fsck and
also potentially for debugging.

Other than that, its just a few small random bug fixes and
clean ups,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2014-04-01  9:15 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2014-04-01  9:15 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Here is the current content of the GFS2 -nmw tree for the
current merge window.

One of the main highlights this time, is not the patches themselves
but instead the widening contributor base. It is good to see that
interest is increasing in GFS2, and I'd like to thank all the
contributors to this patch set.

In addition to the usual set of bug fixes and clean ups, there are
patches to improve inode creation performance when xattrs are required
and some improvements to the transaction code which is intended to help
improve scalability after further changes in due course. Journal extent
mapping is also updated to make it more efficient and again, this is a
foundation for future work in this area.

The maximum number of ACLs has been increased to 300 (for a 4k block size)
which means that even with a few additional xattrs from selinux,
everything should fit within a single fs block. There is also a patch
to bring GFS2's own copy of the writepages code up to the same level as
the core VFS. Eventually we may be able to merge some of this code, since
it is fairly similar.

The other major change this time, is bringing consistency to the printing
of messages via fs_<level>, pr_<level> macros. 

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2014-06-03 11:02 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2014-06-03 11:02 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

This must be about the smallest merge window patch set ever for GFS2.
It is probably also the first one without a single patch from me. That
is down to a combination of factors, and I have some things in the works
that are not quite ready yet, that I hope to put in next time around.

Returning to what is here this time... we have 3 patches which fix
various warnings. Two are bug fixes (for quotas and also a
rare recovery race condition). The final patch, from Ben Marzinski,
is an important change in the freeze code which has been in
progress for some time. This removes the need to take and drop the
transaction lock for every single transaction, when the only time it
was used, was at file system freeze time. Ben's patch integrates the
freeze operation into the journal flush code as an alternative with
lower overheads and also lands up resolving some difficult to fix races
at the same time,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2014-10-08  9:53 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2014-10-08  9:53 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

Not a huge amount this time... just four patches. This time we have a couple
of bug fixes, one relating to bad i_goal values which are now ignored (i_goal
is basically a hint so it is safe to so this) and another relating to the
saving of the dirent location during rename. There is one performance
improvement, which is an optimisation in rgblk_free so that multiple block
deallocations will now be more efficient, and one clean up patch to use
_RET_IP_ rather than writing it out longhand,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [Cluster-devel] GFS2: Pre-pull patch posting (merge window)
@ 2014-12-08 12:38 Steven Whitehouse
  0 siblings, 0 replies; 44+ messages in thread
From: Steven Whitehouse @ 2014-12-08 12:38 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

In contrast to recent merge windows, there are a number of interesting features
this time. There is a set of patches to improve performance in relation to
block reservations. Some correctness fixes for fallocate, and an update
to the freeze/thaw code which greatly simplyfies this code path. In
addition there is a set of clean ups from Al Viro too,

Steve.

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2014-12-08 12:38 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-15  9:11 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 01/15] GFS2: Use RCU for glock hash table Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 02/15] GFS2: Post-VFS scale update for RCU path walk Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 03/15] GFS2: Fix glock queue trace point Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 04/15] GFS2: Improve cluster mmap scalability Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 05/15] GFS2: panics on quotacheck update Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 06/15] GFS2: deallocation performance patch Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 07/15] GFS2: quota allows exceeding hard limit Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 08/15] GFS2: Fix glock deallocation race Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 09/15] GFS2: Remove potential race in flock code Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 10/15] GFS2: Optimize glock multiple-dequeue code Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 11/15] GFS2: fix block allocation check for fallocate Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 12/15] GFS2: introduce AIL lock Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 13/15] GFS2: Update to AIL list locking Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 14/15] GFS2: Adding missing unlock_page() Steven Whitehouse
2011-03-15  9:11 ` [Cluster-devel] [PATCH 15/15] GFS2: Don't use _raw version of RCU dereference Steven Whitehouse
  -- strict thread matches above, loose matches on Subject: below --
2011-05-19  8:46 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
2011-07-22  9:16 Steven Whitehouse
2011-10-24 12:48 Steven Whitehouse
2012-01-05 11:51 Steven Whitehouse
2012-03-19 10:25 Steven Whitehouse
     [not found] ` <4F674696.7030602@xenotime.net>
2012-03-19 14:59   ` Steven Whitehouse
     [not found]     ` <4F674E4F.5080904@xenotime.net>
2012-03-19 15:34       ` Steven Whitehouse
2012-03-23 19:41         ` David Teigland
2012-03-23 19:46           ` David Miller
     [not found]           ` <4F6CD7AD.9030306@xenotime.net>
2012-03-23 20:09             ` Steven Whitehouse
2012-03-23 20:18             ` David Teigland
     [not found]               ` <20120323220618.GA30906@d2.synalogic.ca>
2012-03-26 10:44                 ` Steven Whitehouse
     [not found]                   ` <4F79C733.60604@xenotime.net>
2012-04-02 15:47                     ` Steven Whitehouse
2012-03-20  9:47       ` Steven Whitehouse
2012-05-17 12:23 Steven Whitehouse
2012-07-23  8:00 Steven Whitehouse
2012-09-26  8:25 Steven Whitehouse
2012-11-30  9:52 Steven Whitehouse
2013-02-19 10:07 Steven Whitehouse
2013-04-26  9:18 Steven Whitehouse
2013-07-01  9:33 Steven Whitehouse
2013-09-05  9:02 [Cluster-devel] GFS2 " Steven Whitehouse
2013-11-04 11:09 [Cluster-devel] GFS2: " Steven Whitehouse
2014-01-20 12:23 Steven Whitehouse
2014-04-01  9:15 Steven Whitehouse
2014-06-03 11:02 Steven Whitehouse
2014-10-08  9:53 Steven Whitehouse
2014-12-08 12:38 Steven Whitehouse

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).