cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Bob Peterson <rpeterso@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH 04/11] gfs2: don't try to add buffers to transactions a second time for jdata
Date: Fri, 24 Jul 2020 13:32:57 -0500	[thread overview]
Message-ID: <20200724183304.366913-5-rpeterso@redhat.com> (raw)
In-Reply-To: <20200724183304.366913-1-rpeterso@redhat.com>

When jdata buffers are written by user-space, the iomap code adds the
buffers to the appropriate transaction's databufs queue. Then they get
pinned, and unpinned and make their way to the ail1 list. But then
gfs2_ail1_start gets called, resulting in this calling sequence:

gfs2_ail1_start(sdp);
   gfs2_ail1_flush(sdp, &wbc); wbc has for_sync=1
      list_for_each_entry_reverse(tr, head, tr_list) {
         ret = gfs2_ail1_start_one(sdp, wbc, tr);
            ret = generic_writepages(mapping, wbc);
               write_cache_pages
                  __writepage
                     gfs2_jdata_writepage
                        __gfs2_jdata_writepage

At this point, __gfs2_jdata_writepage is trying to write the in-place
jdata buffers, but ends up calling gfs2_page_add_databufs which calls
gfs2_trans_add_data which, of course, requires a transaction. But the
gfs2_ail1_flush calling sequence is for flushing transactions, so it
does not have a transaction. In fact, it should just be writing the
jdata buffers in-place and shouldn't need a transaction.

This got us into strange situations where it thought it needed a
transaction when it didn't, and infinite loops because it thought
the buffer was PageChecked (i.e. re-dirtied by a competing write op)
so it just kept waiting for it to not be PageChecked each time.

To fix this, this patch introduces a new buffer_head flag "needs_trans"
to indicate when a buffer has been dirtied by a real write as opposed
to just an in-place write for the ail list.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/aops.c   | 37 +++++++++++++++++++++++--------------
 fs/gfs2/bmap.c   |  1 +
 fs/gfs2/incore.h |  3 +++
 fs/gfs2/lops.c   |  1 +
 fs/gfs2/trans.c  |  2 ++
 5 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ff2ec416f106..a9af28abe468 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -158,14 +158,29 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 
-	if (PageChecked(page)) {
-		ClearPageChecked(page);
-		if (!page_has_buffers(page)) {
-			create_empty_buffers(page, inode->i_sb->s_blocksize,
-					     BIT(BH_Dirty)|BIT(BH_Uptodate));
+	if (!PageChecked(page))
+		goto write;
+
+	ClearPageChecked(page);
+	if (page_has_buffers(page)) {
+		struct buffer_head *bh, *head;
+
+		bh = head = page_buffers(page);
+		do {
+			if (buffer_needstrans(bh))
+				break;
+			bh = bh->b_this_page;
+		} while (bh != head);
+		if (buffer_needstrans(bh)) {
+			BUG_ON(!current->journal_info);
+			gfs2_page_add_databufs(ip, page, 0,
+					       sdp->sd_vfs->s_blocksize);
 		}
-		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
+	} else {
+		create_empty_buffers(page, inode->i_sb->s_blocksize,
+				     BIT(BH_Dirty)|BIT(BH_Uptodate));
 	}
+write:
 	return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
 }
 
@@ -184,15 +199,9 @@ static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 
-	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
-		goto out;
-	if (PageChecked(page) || current->journal_info)
-		goto out_ignore;
-	return __gfs2_jdata_writepage(page, wbc);
+	if (!gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
+		return __gfs2_jdata_writepage(page, wbc);
 
-out_ignore:
-	redirty_page_for_writepage(wbc, page);
-out:
 	unlock_page(page);
 	return 0;
 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6306eaae378b..85cf903011d7 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -94,6 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 		gfs2_trans_add_data(ip->i_gl, bh);
 	else {
 		mark_buffer_dirty(bh);
+		set_buffer_needstrans(bh);
 		gfs2_ordered_add_inode(ip);
 	}
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index ca2ec02436ec..f510126efc1c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -149,12 +149,15 @@ static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
 enum gfs2_state_bits {
 	BH_Pinned = BH_PrivateStart,
 	BH_Escaped = BH_PrivateStart + 1,
+	BH_Needstrans = BH_PrivateStart + 2,
 };
 
 BUFFER_FNS(Pinned, pinned)
 TAS_BUFFER_FNS(Pinned, pinned)
 BUFFER_FNS(Escaped, escaped)
 TAS_BUFFER_FNS(Escaped, escaped)
+BUFFER_FNS(Needstrans, needstrans)
+TAS_BUFFER_FNS(Needstrans, needstrans)
 
 struct gfs2_bufdata {
 	struct buffer_head *bd_bh;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index cb2a11b458c6..6f74fc43e9c2 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -104,6 +104,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 	BUG_ON(!buffer_pinned(bh));
 
 	lock_buffer(bh);
+	clear_buffer_needstrans(bh);
 	mark_buffer_dirty(bh);
 	clear_buffer_pinned(bh);
 
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index a3dfa3aa87ad..897d08e2a6b0 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -130,6 +130,7 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
 	bd->bd_bh = bh;
 	bd->bd_gl = gl;
 	INIT_LIST_HEAD(&bd->bd_list);
+	INIT_LIST_HEAD(&bd->bd_ail_st_list);
 	bh->b_private = bd;
 	return bd;
 }
@@ -155,6 +156,7 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
 	struct gfs2_bufdata *bd;
 
 	lock_buffer(bh);
+	clear_buffer_needstrans(bh);
 	if (buffer_pinned(bh)) {
 		set_bit(TR_TOUCHED, &tr->tr_flags);
 		goto out;
-- 
2.26.2



  parent reply	other threads:[~2020-07-24 18:32 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-24 18:32 [Cluster-devel] [GFS2 PATCH 00/11] gfs2: jdata patch collection Bob Peterson
2020-07-24 18:32 ` [Cluster-devel] [GFS2 PATCH 01/11] gfs2: inline gfs2_write_jdata_pagevec into gfs2_write_cache_jdata Bob Peterson
2020-07-24 18:32 ` [Cluster-devel] [GFS2 PATCH 02/11] gfs2: don't break integrity writeback on __gfs2_jdata_writepage error Bob Peterson
2020-08-03 17:54   ` Andreas Gruenbacher
2020-07-24 18:32 ` [Cluster-devel] [GFS2 PATCH 03/11] gfs2: Fix inaccurate comment Bob Peterson
2020-07-24 18:32 ` Bob Peterson [this message]
2020-08-03 17:52   ` [Cluster-devel] [GFS2 PATCH 04/11] gfs2: don't try to add buffers to transactions a second time for jdata Andreas Gruenbacher
2020-07-24 18:32 ` [Cluster-devel] [GFS2 PATCH 05/11] gfs2: Wipe jdata and ail1 in gfs2_journal_wipe, formerly gfs2_meta_wipe Bob Peterson
2020-07-24 18:32 ` [Cluster-devel] [GFS2 PATCH 06/11] gfs2: rename gfs2_write_full_page to gfs2_write_jdata_page, remove parm Bob Peterson
2020-07-24 18:33 ` [Cluster-devel] [GFS2 PATCH 07/11] gfs2: Add a new jdata-specific version of gfs2_get_block_noalloc Bob Peterson
2020-07-24 18:33 ` [Cluster-devel] [GFS2 PATCH 08/11] gfs2: Add caller info to log_blocks trace point Bob Peterson
2020-07-24 18:33 ` [Cluster-devel] [GFS2 PATCH 09/11] gfs2: enhance log_blocks trace point to show log blocks free Bob Peterson
2020-07-24 18:33 ` [Cluster-devel] [GFS2 PATCH 10/11] gfs2: print details on transactions that aren't properly ended Bob Peterson
2020-07-24 18:33 ` [Cluster-devel] [GFS2 PATCH 11/11] gfs2: Never call gfs2_block_zero_range with an open transaction Bob Peterson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200724183304.366913-5-rpeterso@redhat.com \
    --to=rpeterso@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).