From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 5/8] xfs: split direct IO write path from xfs_file_aio_write
Date: Tue, 4 Jan 2011 15:48:35 +1100 [thread overview]
Message-ID: <1294116518-14908-6-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1294116518-14908-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
The current xfs_file_aio_write code is a mess of locking shenanigans
to handle the different locking requirements of buffered and direct
IO. Start to clean this up by disentangling the direct IO path from
the mess.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/xfs_file.c | 168 ++++++++++++++++++++++++++++---------------
1 files changed, 110 insertions(+), 58 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 0d6111e..d546953 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -619,6 +619,110 @@ out_lock:
return error;
}
+/*
+ * xfs_file_dio_aio_write - handle direct IO writes
+ *
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
+ * By spearating it from the buffered write path we remove all the tricky to
+ * follow locking changes and looping. This also clearly indicates that XFS
+ * does not fall back to buffered IO in the direct IO write path.
+ *
+ * Returns with locks held indicated by @iolock and errors indicated by
+ * negative return values.
+ */
+STATIC ssize_t
+xfs_file_dio_aio_write(
+ struct kiocb *iocb,
+ const struct iovec *iovp,
+ unsigned long nr_segs,
+ loff_t pos,
+ size_t ocount,
+ int *iolock)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ ssize_t ret = 0;
+ xfs_fsize_t new_size;
+ size_t count = ocount;
+ xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ?
+ mp->m_rtdev_targp : mp->m_ddev_targp;
+
+ *iolock = 0;
+ if ((pos & target->bt_smask) || (count & target->bt_smask))
+ return -XFS_ERROR(EINVAL);
+
+ if (mapping->nrpages || pos > ip->i_size)
+ *iolock = XFS_IOLOCK_EXCL;
+ else
+ *iolock = XFS_IOLOCK_SHARED;
+ xfs_rw_ilock(ip, XFS_ILOCK_EXCL|*iolock);
+
+ ret = generic_write_checks(file, &pos, &count,
+ S_ISBLK(inode->i_mode));
+ if (ret) {
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|*iolock);
+ *iolock = 0;
+ return ret;
+ }
+
+ new_size = pos + count;
+ if (new_size > ip->i_size)
+ ip->i_new_size = new_size;
+
+ if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+ file_update_time(file);
+
+ /*
+ * If the offset is beyond the size of the file, we have a couple of
+ * things to do. First, if there is already space allocated we need to
+ * either create holes or zero the disk or ...
+ *
+ * If there is a page where the previous size lands, we need to zero it
+ * out up to the new size.
+ */
+ if (pos > ip->i_size) {
+ ret = -xfs_zero_eof(ip, pos, ip->i_size);
+ if (ret) {
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+ return ret;
+ }
+ }
+ xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * If we're writing the file then make sure to clear the setuid and
+ * setgid bits if the process is not being run by root. This keeps
+ * people from modifying setuid and setgid binaries.
+ */
+ ret = file_remove_suid(file);
+ if (unlikely(ret))
+ return ret;
+
+ if (mapping->nrpages) {
+ WARN_ON(*iolock != XFS_IOLOCK_EXCL);
+ ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
+ FI_REMAPF_LOCKED);
+ if (ret)
+ return ret;
+ }
+
+ if (*iolock == XFS_IOLOCK_EXCL) {
+ /* demote the lock now the cached pages are gone */
+ xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+ *iolock = XFS_IOLOCK_SHARED;
+ }
+
+ trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+ ret = generic_file_direct_write(iocb, iovp,
+ &nr_segs, pos, &iocb->ki_pos, count, ocount);
+
+ /* No fallback to buffered IO on errors for XFS. */
+ return ret;
+}
+
STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
@@ -659,14 +763,13 @@ xfs_file_aio_write(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
-relock:
if (ioflags & IO_ISDIRECT) {
- iolock = XFS_IOLOCK_SHARED;
- } else {
- iolock = XFS_IOLOCK_EXCL;
+ ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
+ ocount, &iolock);
+ goto done_io;
}
-start:
+ iolock = XFS_IOLOCK_EXCL;
ret = generic_write_checks(file, &pos, &count,
S_ISBLK(inode->i_mode));
if (ret) {
@@ -674,24 +777,6 @@ start:
return ret;
}
- if (ioflags & IO_ISDIRECT) {
- xfs_buftarg_t *target =
- XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
-
- if ((pos & target->bt_smask) || (count & target->bt_smask)) {
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
- return XFS_ERROR(-EINVAL);
- }
-
- if (iolock != XFS_IOLOCK_EXCL &&
- (mapping->nrpages || pos > ip->i_size)) {
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
- iolock = XFS_IOLOCK_EXCL;
- goto start;
- }
- }
-
new_size = pos + count;
if (new_size > ip->i_size)
ip->i_new_size = new_size;
@@ -730,41 +815,7 @@ start:
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
- if ((ioflags & IO_ISDIRECT)) {
- if (mapping->nrpages) {
- WARN_ON(iolock != XFS_IOLOCK_EXCL);
- ret = -xfs_flushinval_pages(ip,
- (pos & PAGE_CACHE_MASK),
- -1, FI_REMAPF_LOCKED);
- if (ret)
- goto out_unlock_internal;
- }
-
- if (iolock == XFS_IOLOCK_EXCL) {
- /* demote the lock now the cached pages are gone */
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- iolock = XFS_IOLOCK_SHARED;
- }
-
- trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
- ret = generic_file_direct_write(iocb, iovp,
- &nr_segs, pos, &iocb->ki_pos, count, ocount);
-
- /*
- * direct-io write to a hole: fall through to buffered I/O
- * for completing the rest of the request.
- */
- if (ret >= 0 && ret != count) {
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- pos += ret;
- count -= ret;
-
- ioflags &= ~IO_ISDIRECT;
- xfs_rw_iunlock(ip, iolock);
- goto relock;
- }
- } else {
+ if (!(ioflags & IO_ISDIRECT)) {
int enospc = 0;
write_retry:
@@ -788,6 +839,7 @@ write_retry:
current->backing_dev_info = NULL;
+done_io:
xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
if (ret <= 0)
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2011-01-04 4:46 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-01-04 4:48 [PATCH 0/8] xfs: prevent corruption due to overlapping AIO DIO V2 Dave Chinner
2011-01-04 4:48 ` [PATCH 1/8] xfs: ensure sync write errors are returned Dave Chinner
2011-01-05 1:53 ` Alex Elder
2011-01-07 8:45 ` Christoph Hellwig
2011-01-07 9:07 ` Dave Chinner
2011-01-04 4:48 ` [PATCH 2/8] xfs: factor common post-write isize handling code Dave Chinner
2011-01-05 1:54 ` Alex Elder
2011-01-04 4:48 ` [PATCH 4/8] xfs: introduce xfs_rw_lock() helpers for locking the inode Dave Chinner
2011-01-05 1:54 ` Alex Elder
2011-01-05 7:55 ` Dave Chinner
2011-01-04 4:48 ` Dave Chinner [this message]
2011-01-05 1:54 ` [PATCH 5/8] xfs: split direct IO write path from xfs_file_aio_write Alex Elder
2011-01-05 7:36 ` Dave Chinner
2011-01-07 8:58 ` Christoph Hellwig
2011-01-07 9:21 ` Dave Chinner
2011-01-04 4:48 ` [PATCH 6/8] xfs: split buffered " Dave Chinner
2011-01-05 1:55 ` Alex Elder
2011-01-04 4:48 ` [PATCH 7/8] xfs: factor common write setup code Dave Chinner
2011-01-05 1:55 ` Alex Elder
2011-01-07 8:53 ` Christoph Hellwig
2011-01-07 9:20 ` Dave Chinner
2011-01-04 4:48 ` [PATCH 8/8] xfs: serialise unaligned direct IOs Dave Chinner
2011-01-05 1:55 ` Alex Elder
2011-01-05 1:53 ` [PATCH 0/8] xfs: prevent corruption due to overlapping AIO DIO V2 Alex Elder
-- strict thread matches above, loose matches on Subject: below --
2011-01-07 11:30 [PATCH 0/8] xfs: prevent corruption due to overlapping AIO DIO V3 Dave Chinner
2011-01-07 11:30 ` [PATCH 5/8] xfs: split direct IO write path from xfs_file_aio_write Dave Chinner
2011-01-10 19:24 ` Christoph Hellwig
2011-01-10 23:37 [PATCH 0/8] xfs: prevent corruption due to overlapping AIO DIO V4 Dave Chinner
2011-01-10 23:37 ` [PATCH 5/8] xfs: split direct IO write path from xfs_file_aio_write Dave Chinner
2011-01-11 21:44 ` Alex Elder
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1294116518-14908-6-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox