From mboxrd@z Thu Jan 1 00:00:00 1970 From: Abhijith Das Date: Wed, 7 Sep 2011 08:37:48 -0400 (EDT) Subject: [Cluster-devel] GFS2: Fix AIL flush issue during fsync In-Reply-To: <1315385189.2718.7.camel@menhir> Message-ID: <1232775770.552718.1315399068762.JavaMail.root@zmail05.collab.prod.int.phx2.redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit This looks good to me. I've run iozone multiple times over nfs with this patch without causing gfs2 to crash. Cheers! --Abhi ----- Original Message ----- > From: "Steven Whitehouse" > To: cluster-devel at redhat.com > Cc: "Abhijith Das" > Sent: Wednesday, September 7, 2011 3:46:29 AM > Subject: GFS2: Fix AIL flush issue during fsync > Unfortunately, it is not enough to just ignore locked buffers during > the AIL flush from fsync. We need to be able to ignore all buffers > which are locked, dirty or pinned at this stage as they might have > been added subsequent to the log flush earlier in the fsync function. > > In addition, this means that we no longer need to rely on i_mutex to > keep out writes during fsync, so we can, as a side-effect, remove > that protection too. > > Signed-off-by: Steven Whitehouse > Tested-By: Abhijith Das > > diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c > index 3467f36..3b65f67 100644 > --- a/fs/gfs2/file.c > +++ b/fs/gfs2/file.c > @@ -593,16 +593,12 @@ static int gfs2_fsync(struct file *file, loff_t > start, loff_t end, > sync_state &= ~I_DIRTY_SYNC; > > if (sync_state) { > - mutex_lock(&inode->i_mutex); > ret = sync_inode_metadata(inode, 1); > - if (ret) { > - mutex_unlock(&inode->i_mutex); > + if (ret) > return ret; > - } > if (gfs2_is_jdata(ip)) > filemap_write_and_wait(mapping); > - gfs2_ail_flush(ip->i_gl); > - mutex_unlock(&inode->i_mutex); > + gfs2_ail_flush(ip->i_gl, 1); > } > > if (mapping->nrpages) > diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c > index 951541b..78418b4 100644 > --- a/fs/gfs2/glops.c > +++ b/fs/gfs2/glops.c > @@ -42,41 +42,41 @@ static void gfs2_ail_error(struct gfs2_glock *gl, > const struct buffer_head *bh) > /** > * __gfs2_ail_flush - remove all buffers for a given lock from the AIL > * @gl: the glock > + * @fsync: set when called from fsync (not all buffers will be clean) > * > * None of the buffers should be dirty, locked, or pinned. > */ > > -static void __gfs2_ail_flush(struct gfs2_glock *gl, unsigned long > b_state) > +static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) > { > struct gfs2_sbd *sdp = gl->gl_sbd; > struct list_head *head = &gl->gl_ail_list; > - struct gfs2_bufdata *bd; > + struct gfs2_bufdata *bd, *tmp; > struct buffer_head *bh; > + const unsigned long b_state = (1UL << BH_Dirty)|(1UL << > BH_Pinned)|(1UL << BH_Lock); > sector_t blocknr; > > + gfs2_log_lock(sdp); > spin_lock(&sdp->sd_ail_lock); > - while (!list_empty(head)) { > - bd = list_entry(head->next, struct gfs2_bufdata, > - bd_ail_gl_list); > + list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) { > bh = bd->bd_bh; > - blocknr = bh->b_blocknr; > - if (bh->b_state & b_state) > + if (bh->b_state & b_state) { > + if (fsync) > + continue; > gfs2_ail_error(gl, bh); > + } > + blocknr = bh->b_blocknr; > bh->b_private = NULL; > gfs2_remove_from_ail(bd); /* drops ref on bh */ > - spin_unlock(&sdp->sd_ail_lock); > > bd->bd_bh = NULL; > bd->bd_blkno = blocknr; > > - gfs2_log_lock(sdp); > gfs2_trans_add_revoke(sdp, bd); > - gfs2_log_unlock(sdp); > - > - spin_lock(&sdp->sd_ail_lock); > } > - gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); > + BUG_ON(!fsync && atomic_read(&gl->gl_ail_count)); > spin_unlock(&sdp->sd_ail_lock); > + gfs2_log_unlock(sdp); > } > > > @@ -99,13 +99,13 @@ static void gfs2_ail_empty_gl(struct gfs2_glock > *gl) > BUG_ON(current->journal_info); > current->journal_info = &tr; > > - __gfs2_ail_flush(gl, (1ul << BH_Dirty)|(1ul << BH_Pinned)|(1ul << > BH_Lock)); > + __gfs2_ail_flush(gl, 0); > > gfs2_trans_end(sdp); > gfs2_log_flush(sdp, NULL); > } > > -void gfs2_ail_flush(struct gfs2_glock *gl) > +void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) > { > struct gfs2_sbd *sdp = gl->gl_sbd; > unsigned int revokes = atomic_read(&gl->gl_ail_count); > @@ -117,7 +117,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl) > ret = gfs2_trans_begin(sdp, 0, revokes); > if (ret) > return; > - __gfs2_ail_flush(gl, (1ul << BH_Dirty)|(1ul << BH_Pinned)); > + __gfs2_ail_flush(gl, fsync); > gfs2_trans_end(sdp); > gfs2_log_flush(sdp, NULL); > } > diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h > index 6fce409..bf95a2d 100644 > --- a/fs/gfs2/glops.h > +++ b/fs/gfs2/glops.h > @@ -23,6 +23,6 @@ extern const struct gfs2_glock_operations > gfs2_quota_glops; > extern const struct gfs2_glock_operations gfs2_journal_glops; > extern const struct gfs2_glock_operations *gfs2_glops_list[]; > > -extern void gfs2_ail_flush(struct gfs2_glock *gl); > +extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync); > > #endif /* __GLOPS_DOT_H__ */ > diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c > index 87e9141..71e4209 100644 > --- a/fs/gfs2/super.c > +++ b/fs/gfs2/super.c > @@ -1533,7 +1533,7 @@ static void gfs2_evict_inode(struct inode > *inode) > out_truncate: > gfs2_log_flush(sdp, ip->i_gl); > write_inode_now(inode, 1); > - gfs2_ail_flush(ip->i_gl); > + gfs2_ail_flush(ip->i_gl, 0); > > /* Case 2 starts here */ > error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);