All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: xfs@oss.sgi.com
Subject: Re: [PATCH 4/8] xfs: introduce xfs_rw_lock() helpers for locking the inode
Date: Wed, 12 Jan 2011 08:02:50 +1100	[thread overview]
Message-ID: <20110111210250.GL28803@dastard> (raw)
In-Reply-To: <20110111173627.GB24025@infradead.org>

On Tue, Jan 11, 2011 at 12:36:27PM -0500, Christoph Hellwig wrote:
> On Tue, Jan 11, 2011 at 10:37:44AM +1100, Dave Chinner wrote:
> > +/*
> > + * xfs_file_splice_write() does not use xfs_rw_ilock() because
> > + * generic_file_splice_write() takes the i_mutex itself. This, in theory,
> > + * couuld cause lock inversions between the aio_write path and the splice path
> > + * if someone is doing concurrent splice(2) based writes and write(2) based
> > + * writes to the same inode. The only real way to fix this is to re-implement
> > + * the generic code here with correct locking orders.
> > + */
> >  STATIC ssize_t
> >  xfs_file_splice_write(
> >  	struct pipe_inode_info	*pipe,
> > @@ -386,14 +427,13 @@ xfs_file_splice_write(
> >  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
> >  		return -EIO;
> >  
> > -	xfs_ilock(ip, XFS_IOLOCK_EXCL);
> > +	xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL);
> >  
> >  	new_size = *ppos + count;
> >  
> > -	xfs_ilock(ip, XFS_ILOCK_EXCL);
> >  	if (new_size > ip->i_size)
> >  		ip->i_new_size = new_size;
> > -	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> > +	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
> 
> While using the xfs_rw_iunlock here actually is correct I think it's

Argh! I thought I reverted all the changes to
xfs_file_splice_write().

> highly confusing, given that we didn't use it to take the ilock.

It definitely held the ilock around that size update before this
series. ;)

> What
> about just leaving all the code in xfs_file_splice_write as-is and not
> touching it at all?

Updated version below.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

xfs: introduce xfs_rw_lock() helpers for locking the inode

From: Dave Chinner <dchinner@redhat.com>

We need to obtain the i_mutex, i_iolock and i_ilock during the read
and write paths. Add a set of wrapper functions to neatly
encapsulate the lock ordering and shared/exclusive semantics to make
the locking easier to follow and get right.

Note that this changes some of the exclusive locking serialisation in
that serialisation will occur against the i_mutex instead of the
XFS_IOLOCK_EXCL. This does not change any behaviour, and it is
arguably more efficient to use the mutex for such serialisation than
the rw_sem.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/linux-2.6/xfs_file.c |  131 ++++++++++++++++++++++++++-----------------
 1 files changed, 80 insertions(+), 51 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index c47d7dc0..b5e13fb 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -41,6 +41,40 @@
 static const struct vm_operations_struct xfs_file_vm_ops;
 
 /*
+ * Locking primitives for read and write IO paths to ensure we consistently use
+ * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
+ */
+static inline void
+xfs_rw_ilock(
+	struct xfs_inode	*ip,
+	int			type)
+{
+	if (type & XFS_IOLOCK_EXCL)
+		mutex_lock(&VFS_I(ip)->i_mutex);
+	xfs_ilock(ip, type);
+}
+
+static inline void
+xfs_rw_iunlock(
+	struct xfs_inode	*ip,
+	int			type)
+{
+	xfs_iunlock(ip, type);
+	if (type & XFS_IOLOCK_EXCL)
+		mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+static inline void
+xfs_rw_ilock_demote(
+	struct xfs_inode	*ip,
+	int			type)
+{
+	xfs_ilock_demote(ip, type);
+	if (type & XFS_IOLOCK_EXCL)
+		mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+/*
  *	xfs_iozero
  *
  *	xfs_iozero clears the specified range of buffer supplied,
@@ -262,22 +296,21 @@ xfs_file_aio_read(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	if (unlikely(ioflags & IO_ISDIRECT))
-		mutex_lock(&inode->i_mutex);
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
 	if (unlikely(ioflags & IO_ISDIRECT)) {
+		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+
 		if (inode->i_mapping->nrpages) {
 			ret = -xfs_flushinval_pages(ip,
 					(iocb->ki_pos & PAGE_CACHE_MASK),
 					-1, FI_REMAPF_LOCKED);
+			if (ret) {
+				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
+				return ret;
+			}
 		}
-		mutex_unlock(&inode->i_mutex);
-		if (ret) {
-			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-			return ret;
-		}
-	}
+		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+	} else
+		xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 
 	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
 
@@ -285,7 +318,7 @@ xfs_file_aio_read(
 	if (ret > 0)
 		XFS_STATS_ADD(xs_read_bytes, ret);
 
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 	return ret;
 }
 
@@ -309,7 +342,7 @@ xfs_file_splice_read(
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 
 	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
 
@@ -317,7 +350,7 @@ xfs_file_splice_read(
 	if (ret > 0)
 		XFS_STATS_ADD(xs_read_bytes, ret);
 
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 	return ret;
 }
 
@@ -338,10 +371,10 @@ xfs_aio_write_isize_update(
 		*ppos = isize;
 
 	if (*ppos > ip->i_size) {
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
 		if (*ppos > ip->i_size)
 			ip->i_size = *ppos;
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
 	}
 }
 
@@ -356,14 +389,22 @@ xfs_aio_write_newsize_update(
 	struct xfs_inode	*ip)
 {
 	if (ip->i_new_size) {
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
 		ip->i_new_size = 0;
 		if (ip->i_d.di_size > ip->i_size)
 			ip->i_d.di_size = ip->i_size;
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
 	}
 }
 
+/*
+ * xfs_file_splice_write() does not use xfs_rw_ilock() because
+ * generic_file_splice_write() takes the i_mutex itself. This, in theory,
+ * couuld cause lock inversions between the aio_write path and the splice path
+ * if someone is doing concurrent splice(2) based writes and write(2) based
+ * writes to the same inode. The only real way to fix this is to re-implement
+ * the generic code here with correct locking orders.
+ */
 STATIC ssize_t
 xfs_file_splice_write(
 	struct pipe_inode_info	*pipe,
@@ -604,7 +645,6 @@ xfs_file_aio_write(
 	xfs_fsize_t		new_size;
 	int			iolock;
 	size_t			ocount = 0, count;
-	int			need_i_mutex;
 
 	XFS_STATS_INC(xs_write_calls);
 
@@ -631,21 +671,17 @@ xfs_file_aio_write(
 relock:
 	if (ioflags & IO_ISDIRECT) {
 		iolock = XFS_IOLOCK_SHARED;
-		need_i_mutex = 0;
 	} else {
 		iolock = XFS_IOLOCK_EXCL;
-		need_i_mutex = 1;
-		mutex_lock(&inode->i_mutex);
 	}
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
-
 start:
+	xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock);
 	ret = generic_write_checks(file, &pos, &count,
 					S_ISBLK(inode->i_mode));
 	if (ret) {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
-		goto out_unlock_mutex;
+		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
+		return ret;
 	}
 
 	if (ioflags & IO_ISDIRECT) {
@@ -654,16 +690,20 @@ start:
 				mp->m_rtdev_targp : mp->m_ddev_targp;
 
 		if ((pos & target->bt_smask) || (count & target->bt_smask)) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
+			xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
 			return XFS_ERROR(-EINVAL);
 		}
 
-		if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
+		/*
+		 * For direct I/O, if there are cached pages or we're extending
+		 * the file, we need IOLOCK_EXCL until we're sure the bytes at
+		 * the new EOF have been zeroed and/or the cached pages are
+		 * flushed out.  Upgrade the I/O lock and start again.
+		 */
+		if (iolock != XFS_IOLOCK_EXCL &&
+		    (mapping->nrpages || pos > ip->i_size)) {
+			xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
 			iolock = XFS_IOLOCK_EXCL;
-			need_i_mutex = 1;
-			mutex_lock(&inode->i_mutex);
-			xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
 			goto start;
 		}
 	}
@@ -687,11 +727,11 @@ start:
 	if (pos > ip->i_size) {
 		ret = -xfs_zero_eof(ip, pos, ip->i_size);
 		if (ret) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
 			goto out_unlock_internal;
 		}
 	}
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
 
 	/*
 	 * If we're writing the file then make sure to clear the
@@ -708,7 +748,7 @@ start:
 
 	if ((ioflags & IO_ISDIRECT)) {
 		if (mapping->nrpages) {
-			WARN_ON(need_i_mutex == 0);
+			WARN_ON(iolock != XFS_IOLOCK_EXCL);
 			ret = -xfs_flushinval_pages(ip,
 					(pos & PAGE_CACHE_MASK),
 					-1, FI_REMAPF_LOCKED);
@@ -716,13 +756,10 @@ start:
 				goto out_unlock_internal;
 		}
 
-		if (need_i_mutex) {
+		if (iolock == XFS_IOLOCK_EXCL) {
 			/* demote the lock now the cached pages are gone */
-			xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
-			mutex_unlock(&inode->i_mutex);
-
+			xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 			iolock = XFS_IOLOCK_SHARED;
-			need_i_mutex = 0;
 		}
 
 		trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
@@ -740,7 +777,7 @@ start:
 			count -= ret;
 
 			ioflags &= ~IO_ISDIRECT;
-			xfs_iunlock(ip, iolock);
+			xfs_rw_iunlock(ip, iolock);
 			goto relock;
 		}
 	} else {
@@ -775,14 +812,9 @@ write_retry:
 		loff_t end = pos + ret - 1;
 		int error, error2;
 
-		xfs_iunlock(ip, iolock);
-		if (need_i_mutex)
-			mutex_unlock(&inode->i_mutex);
-
+		xfs_rw_iunlock(ip, iolock);
 		error = filemap_write_and_wait_range(mapping, pos, end);
-		if (need_i_mutex)
-			mutex_lock(&inode->i_mutex);
-		xfs_ilock(ip, iolock);
+		xfs_rw_ilock(ip, iolock);
 
 		error2 = -xfs_file_fsync(file,
 					 (file->f_flags & __O_SYNC) ? 0 : 1);
@@ -794,10 +826,7 @@ write_retry:
 
  out_unlock_internal:
 	xfs_aio_write_newsize_update(ip);
-	xfs_iunlock(ip, iolock);
- out_unlock_mutex:
-	if (need_i_mutex)
-		mutex_unlock(&inode->i_mutex);
+	xfs_rw_iunlock(ip, iolock);
 	return ret;
 }
 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  reply	other threads:[~2011-01-11 21:00 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-01-10 23:37 [PATCH 0/8] xfs: prevent corruption due to overlapping AIO DIO V4 Dave Chinner
2011-01-10 23:37 ` [PATCH 1/8] xfs: ensure sync write errors are returned Dave Chinner
2011-01-10 23:37 ` [PATCH 2/8] xfs: factor common post-write isize handling code Dave Chinner
2011-01-10 23:37 ` [PATCH 4/8] xfs: introduce xfs_rw_lock() helpers for locking the inode Dave Chinner
2011-01-11 17:36   ` Christoph Hellwig
2011-01-11 21:02     ` Dave Chinner [this message]
2011-01-11 21:03       ` Christoph Hellwig
2011-01-11 21:36       ` Alex Elder
2011-01-10 23:37 ` [PATCH 5/8] xfs: split direct IO write path from xfs_file_aio_write Dave Chinner
2011-01-11 21:44   ` Alex Elder
2011-01-10 23:37 ` [PATCH 6/8] xfs: split buffered " Dave Chinner
2011-01-10 23:37 ` [PATCH 7/8] xfs: factor common write setup code Dave Chinner
2011-01-10 23:37 ` [PATCH 8/8] xfs: serialise unaligned direct IOs Dave Chinner
  -- strict thread matches above, loose matches on Subject: below --
2011-01-07 11:30 [PATCH 0/8] xfs: prevent corruption due to overlapping AIO DIO V3 Dave Chinner
2011-01-07 11:30 ` [PATCH 4/8] xfs: introduce xfs_rw_lock() helpers for locking the inode Dave Chinner
2011-01-10 19:23   ` Christoph Hellwig
2011-01-10 22:26     ` Dave Chinner
2011-01-04  4:48 [PATCH 0/8] xfs: prevent corruption due to overlapping AIO DIO V2 Dave Chinner
2011-01-04  4:48 ` [PATCH 4/8] xfs: introduce xfs_rw_lock() helpers for locking the inode Dave Chinner
2011-01-05  1:54   ` Alex Elder
2011-01-05  7:55     ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110111210250.GL28803@dastard \
    --to=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.