All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 4/7] xfs: split direct IO write path from xfs_file_aio_write
Date: Wed, 15 Dec 2010 12:23:25 +1100	[thread overview]
Message-ID: <1292376208-16282-5-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1292376208-16282-1-git-send-email-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

The current xfs_file_aio_write code is a mess of locking shenanigans
to handle the different locking requirements of buffered and direct
IO. Start to clean this up by disentangling the direct IO path from
the mess.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/linux-2.6/xfs_file.c |  184 +++++++++++++++++++++++++++---------------
 1 files changed, 118 insertions(+), 66 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 4608aab..d70a249 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -587,6 +587,116 @@ out_lock:
 	return error;
 }
 
+/*
+ * xfs_file_dio_aio_write - handle direct IO writes
+ *
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
+ * By spearating it from the buffered write path we remove all the tricky to
+ * follow locking changes and looping. This also clearly indicates that XFS
+ * does not fall back to buffered IO in the direct IO write path.
+ *
+ * Returns with locks held indicated by @need_i_mutex and @iolock.
+ */
+STATIC ssize_t
+xfs_file_dio_aio_write(
+	struct kiocb		*iocb,
+	const struct iovec	*iovp,
+	unsigned long		nr_segs,
+	loff_t			pos,
+	size_t			ocount,
+	int			*need_i_mutex,
+	int			*iolock)
+{
+	struct file		*file = iocb->ki_filp;
+	struct address_space	*mapping = file->f_mapping;
+	struct inode		*inode = mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	ssize_t			error = 0;
+	xfs_fsize_t		new_size;
+	size_t			count = ocount;
+	xfs_buftarg_t		*target = XFS_IS_REALTIME_INODE(ip) ?
+					mp->m_rtdev_targp : mp->m_ddev_targp;
+
+	*need_i_mutex = 0;
+	*iolock = 0;
+	if ((pos & target->bt_smask) || (count & target->bt_smask))
+		return XFS_ERROR(-EINVAL);
+
+	if (mapping->nrpages || pos > ip->i_size) {
+		*iolock = XFS_IOLOCK_EXCL;
+		*need_i_mutex = 1;
+		mutex_lock(&inode->i_mutex);
+	} else {
+		*iolock = XFS_IOLOCK_SHARED;
+	}
+	xfs_ilock(ip, XFS_ILOCK_EXCL|*iolock);
+
+	error = generic_write_checks(file, &pos, &count,
+					S_ISBLK(inode->i_mode));
+	if (error) {
+		xfs_iunlock(ip, XFS_ILOCK_EXCL|*iolock);
+		*iolock = 0;
+		return error;
+	}
+
+	new_size = pos + count;
+	if (new_size > ip->i_size)
+		ip->i_new_size = new_size;
+
+	if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+		file_update_time(file);
+
+	/*
+	 * If the offset is beyond the size of the file, we have a couple of
+	 * things to do. First, if there is already space allocated we need to
+	 * either create holes or zero the disk or ...
+	 *
+	 * If there is a page where the previous size lands, we need to zero it
+	 * out up to the new size.
+	 */
+	if (pos > ip->i_size) {
+		error = -xfs_zero_eof(ip, pos, ip->i_size);
+		if (error) {
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			return error;
+		}
+	}
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * If we're writing the file then make sure to clear the setuid and
+	 * setgid bits if the process is not being run by root.  This keeps
+	 * people from modifying setuid and setgid binaries.
+	 */
+	error = file_remove_suid(file);
+	if (unlikely(error))
+		return error;
+
+	if (mapping->nrpages) {
+		WARN_ON(*need_i_mutex == 0);
+		error = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
+							FI_REMAPF_LOCKED);
+		if (error)
+			return error;
+	}
+
+	if (*need_i_mutex) {
+		/* demote the lock now the cached pages are gone */
+		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
+		mutex_unlock(&inode->i_mutex);
+		*iolock = XFS_IOLOCK_SHARED;
+		*need_i_mutex = 0;
+	}
+
+	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+	error = generic_file_direct_write(iocb, iovp,
+			&nr_segs, pos, &iocb->ki_pos, count, ocount);
+
+	/* No fallback to buffered IO on errors for XFS. */
+	return error;
+}
+
 STATIC ssize_t
 xfs_file_aio_write(
 	struct kiocb		*iocb,
@@ -628,19 +738,17 @@ xfs_file_aio_write(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-relock:
 	if (ioflags & IO_ISDIRECT) {
-		iolock = XFS_IOLOCK_SHARED;
-		need_i_mutex = 0;
-	} else {
-		iolock = XFS_IOLOCK_EXCL;
-		need_i_mutex = 1;
-		mutex_lock(&inode->i_mutex);
+		ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
+					ocount, &need_i_mutex, &iolock);
+		goto done_io;
 	}
 
+	iolock = XFS_IOLOCK_EXCL;
+	need_i_mutex = 1;
+	mutex_lock(&inode->i_mutex);
 	xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
 
-start:
 	error = -generic_write_checks(file, &pos, &count,
 					S_ISBLK(inode->i_mode));
 	if (error) {
@@ -648,26 +756,6 @@ start:
 		goto out_unlock_mutex;
 	}
 
-	if (ioflags & IO_ISDIRECT) {
-		xfs_buftarg_t	*target =
-			XFS_IS_REALTIME_INODE(ip) ?
-				mp->m_rtdev_targp : mp->m_ddev_targp;
-
-		if ((pos & target->bt_smask) || (count & target->bt_smask)) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
-			return XFS_ERROR(-EINVAL);
-		}
-
-		if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
-			iolock = XFS_IOLOCK_EXCL;
-			need_i_mutex = 1;
-			mutex_lock(&inode->i_mutex);
-			xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
-			goto start;
-		}
-	}
-
 	new_size = pos + count;
 	if (new_size > ip->i_size)
 		ip->i_new_size = new_size;
@@ -706,44 +794,7 @@ start:
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
 
-	if ((ioflags & IO_ISDIRECT)) {
-		if (mapping->nrpages) {
-			WARN_ON(need_i_mutex == 0);
-			error = xfs_flushinval_pages(ip,
-					(pos & PAGE_CACHE_MASK),
-					-1, FI_REMAPF_LOCKED);
-			if (error)
-				goto out_unlock_internal;
-		}
-
-		if (need_i_mutex) {
-			/* demote the lock now the cached pages are gone */
-			xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
-			mutex_unlock(&inode->i_mutex);
-
-			iolock = XFS_IOLOCK_SHARED;
-			need_i_mutex = 0;
-		}
-
-		trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
-		ret = generic_file_direct_write(iocb, iovp,
-				&nr_segs, pos, &iocb->ki_pos, count, ocount);
-
-		/*
-		 * direct-io write to a hole: fall through to buffered I/O
-		 * for completing the rest of the request.
-		 */
-		if (ret >= 0 && ret != count) {
-			XFS_STATS_ADD(xs_write_bytes, ret);
-
-			pos += ret;
-			count -= ret;
-
-			ioflags &= ~IO_ISDIRECT;
-			xfs_iunlock(ip, iolock);
-			goto relock;
-		}
-	} else {
+	if (!(ioflags & IO_ISDIRECT)) {
 		int enospc = 0;
 		ssize_t ret2 = 0;
 
@@ -767,6 +818,7 @@ write_retry:
 
 	current->backing_dev_info = NULL;
 
+done_io:
 	xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
 
 	error = -ret;
-- 
1.7.2.3

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2010-12-15  1:23 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-15  1:23 [PATCH 0/7] xfs: serialise unaligned direct IOs Dave Chinner
2010-12-15  1:23 ` [PATCH 1/7] xfs: ensure sync write errors are returned Dave Chinner
2010-12-16 11:57   ` Christoph Hellwig
2010-12-16 21:57     ` Dave Chinner
2010-12-16 20:50   ` Alex Elder
2010-12-17  6:43     ` Dave Chinner
2010-12-15  1:23 ` [PATCH 2/7] xfs: factor common post-write isize handling code Dave Chinner
2010-12-15  1:23 ` Dave Chinner [this message]
2010-12-16 12:06   ` [PATCH 4/7] xfs: split direct IO write path from xfs_file_aio_write Christoph Hellwig
2010-12-17  7:31     ` Dave Chinner
2010-12-20 11:29       ` Christoph Hellwig
2010-12-21  0:51         ` Dave Chinner
2010-12-15  1:23 ` [PATCH 5/7] xfs: split buffered " Dave Chinner
2010-12-15  1:23 ` [PATCH 6/7] xfs: factor common write setup code Dave Chinner
2010-12-15  1:23 ` [PATCH 7/7] xfs: serialise unaligned direct IOs Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1292376208-16282-5-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.