From: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
To: "J. Bruce Fields" <bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
Cc: Jeff Layton <jlayton-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org>,
linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
xfs-VZNHf3L845pBDgjK7y7TUQ@public.gmane.org
Subject: [PATCH 19/20] xfs: implement pNFS export operations
Date: Thu, 22 Jan 2015 12:10:05 +0100 [thread overview]
Message-ID: <1421925006-24231-20-git-send-email-hch@lst.de> (raw)
In-Reply-To: <1421925006-24231-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
Add operations to export pNFS block layouts from an XFS filesystem. See
the previous commit adding the operations for an explanation of them.
Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
fs/xfs/Makefile | 1 +
fs/xfs/xfs_export.c | 6 ++
fs/xfs/xfs_fsops.c | 2 +
fs/xfs/xfs_iops.c | 2 +-
fs/xfs/xfs_iops.h | 1 +
fs/xfs/xfs_mount.h | 11 +++
fs/xfs/xfs_pnfs.c | 243 ++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_pnfs.h | 11 +++
8 files changed, 276 insertions(+), 1 deletion(-)
create mode 100644 fs/xfs/xfs_pnfs.c
create mode 100644 fs/xfs/xfs_pnfs.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d617999..df68285 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -121,3 +121,4 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_PROC_FS) += xfs_stats.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
+xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 5eb4a14..b97359b 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -30,6 +30,7 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_log.h"
+#include "xfs_pnfs.h"
/*
* Note that we only accept fileids which are long enough rather than allow
@@ -245,4 +246,9 @@ const struct export_operations xfs_export_operations = {
.fh_to_parent = xfs_fs_fh_to_parent,
.get_parent = xfs_fs_get_parent,
.commit_metadata = xfs_fs_nfs_commit_metadata,
+#ifdef CONFIG_NFSD_PNFS
+ .get_uuid = xfs_fs_get_uuid,
+ .map_blocks = xfs_fs_map_blocks,
+ .commit_blocks = xfs_fs_commit_blocks,
+#endif
};
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 74c6211..99465ba 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -602,6 +602,8 @@ xfs_growfs_data(
if (!mutex_trylock(&mp->m_growlock))
return -EWOULDBLOCK;
error = xfs_growfs_data_private(mp, in);
+ if (!error)
+ mp->m_generation++;
mutex_unlock(&mp->m_growlock);
return error;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c50311c..6ff84e8 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -496,7 +496,7 @@ xfs_setattr_mode(
inode->i_mode |= mode & ~S_IFMT;
}
-static void
+void
xfs_setattr_time(
struct xfs_inode *ip,
struct iattr *iattr)
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index 1c34e43..ea7a98e 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -32,6 +32,7 @@ extern void xfs_setup_inode(struct xfs_inode *);
*/
#define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */
+extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
int flags);
extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 22ccf69..12925d5 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -175,6 +175,17 @@ typedef struct xfs_mount {
struct workqueue_struct *m_reclaim_workqueue;
struct workqueue_struct *m_log_workqueue;
struct workqueue_struct *m_eofblocks_workqueue;
+
+ /*
+ * Generation of the filesysyem layout. This is incremented by each
+ * growfs, and used by the pNFS server to ensure the client updates
+ * its view of the block device once it gets a layout that might
+ * reference the newly added blocks. Does not need to be persistent
+ * as long as we only allow file system size increments, but if we
+ * ever support shrinks it would have to be persisted in addition
+ * to various other kinds of pain inflicted on the pNFS server.
+ */
+ __uint32_t m_generation;
} xfs_mount_t;
/*
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
new file mode 100644
index 0000000..5d25f5d
--- /dev/null
+++ b/fs/xfs/xfs_pnfs.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2014 Christoph Hellwig.
+ */
+#include "xfs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_log.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_error.h"
+#include "xfs_iomap.h"
+#include "xfs_shared.h"
+#include "xfs_pnfs.h"
+
+int
+xfs_fs_get_uuid(
+ struct super_block *sb,
+ u8 *buf,
+ u32 *len,
+ u64 *offset)
+{
+ struct xfs_mount *mp = XFS_M(sb);
+
+ if (*len < sizeof(uuid_t))
+ return -EINVAL;
+
+ memcpy(buf, &mp->m_sb.sb_uuid, sizeof(uuid_t));
+ *len = sizeof(uuid_t);
+ *offset = offsetof(struct xfs_dsb, sb_uuid);
+ return 0;
+}
+
+static void
+xfs_bmbt_to_iomap(
+ struct xfs_inode *ip,
+ struct iomap *iomap,
+ struct xfs_bmbt_irec *imap)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (imap->br_startblock == HOLESTARTBLOCK) {
+ iomap->blkno = -1;
+ iomap->type = IOMAP_HOLE;
+ } else if (imap->br_startblock == DELAYSTARTBLOCK) {
+ iomap->blkno = -1;
+ iomap->type = IOMAP_DELALLOC;
+ } else {
+ iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
+ if (imap->br_state == XFS_EXT_UNWRITTEN)
+ iomap->type = IOMAP_UNWRITTEN;
+ else
+ iomap->type = IOMAP_MAPPED;
+ }
+ iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
+ iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
+}
+
+/*
+ * Get a layout for the pNFS client.
+ */
+int
+xfs_fs_map_blocks(
+ struct inode *inode,
+ loff_t offset,
+ u64 length,
+ struct iomap *iomap,
+ bool write,
+ u32 *device_generation)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_bmbt_irec imap;
+ xfs_fileoff_t offset_fsb, end_fsb;
+ loff_t limit;
+ int bmapi_flags = XFS_BMAPI_ENTIRE;
+ int nimaps = 1;
+ uint lock_flags;
+ int error = 0;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+ if (XFS_IS_REALTIME_INODE(ip))
+ return -ENXIO;
+
+ /*
+ * Lock out any other I/O before we flush and invalidate the pagecache,
+ * and then hand out a layout to the remote system. This is very
+ * similar to direct I/O, except that the synchronization is much more
+ * complicated. See the comment near xfs_break_layouts for a detailed
+ * explanation.
+ */
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ error = -EINVAL;
+ limit = mp->m_super->s_maxbytes;
+ if (!write)
+ limit = max(limit, round_up(i_size_read(inode),
+ inode->i_sb->s_blocksize));
+ if (offset > limit)
+ goto out_unlock;
+ if (offset > limit - length)
+ length = limit - offset;
+
+ error = filemap_write_and_wait(inode->i_mapping);
+ if (error)
+ goto out_unlock;
+ error = invalidate_inode_pages2(inode->i_mapping);
+ if (WARN_ON_ONCE(error))
+ return error;
+
+ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
+ offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
+ lock_flags = xfs_ilock_data_map_shared(ip);
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+ &imap, &nimaps, bmapi_flags);
+ xfs_iunlock(ip, lock_flags);
+
+ if (error)
+ goto out_unlock;
+
+ if (write) {
+ enum xfs_prealloc_flags flags = 0;
+
+ ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+
+ if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
+ error = xfs_iomap_write_direct(ip, offset, length,
+ &imap, nimaps);
+ if (error)
+ goto out_unlock;
+
+ /*
+ * Ensure the next transaction is committed
+ * synchronously so that the blocks allocated and
+ * handed out to the client are guaranteed to be
+ * present even after a server crash.
+ */
+ flags |= XFS_PREALLOC_SET | XFS_PREALLOC_SYNC;
+ }
+
+ error = xfs_update_prealloc_flags(ip, flags);
+ if (error)
+ goto out_unlock;
+ }
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+
+ xfs_bmbt_to_iomap(ip, iomap, &imap);
+ *device_generation = mp->m_generation;
+ return error;
+out_unlock:
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+}
+
+/*
+ * Make sure the blocks described by maps are stable on disk. This includes
+ * converting any unwritten extents, flushing the disk cache and updating the
+ * time stamps.
+ *
+ * Note that we rely on the caller to always send us a timestamp update so that
+ * we always commit a transaction here. If that stops being true we will have
+ * to manually flush the cache here similar to what the fsync code path does
+ * for datasyncs on files that have no dirty metadata.
+ */
+int
+xfs_fs_commit_blocks(
+ struct inode *inode,
+ struct iomap *maps,
+ int nr_maps,
+ struct iattr *iattr)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error, i;
+ loff_t size;
+
+ ASSERT(iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME));
+
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ size = i_size_read(inode);
+ if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size > size)
+ size = iattr->ia_size;
+
+ for (i = 0; i < nr_maps; i++) {
+ u64 start, length, end;
+
+ start = maps[i].offset;
+ if (start > size)
+ continue;
+
+ end = start + maps[i].length;
+ if (end > size)
+ end = size;
+
+ length = end - start;
+ if (!length)
+ continue;
+
+ /*
+ * Make sure reads through the pagecache see the new data.
+ */
+ error = invalidate_inode_pages2_range(inode->i_mapping,
+ start >> PAGE_CACHE_SHIFT,
+ (end - 1) >> PAGE_CACHE_SHIFT);
+ WARN_ON_ONCE(error);
+
+ error = xfs_iomap_write_unwritten(ip, start, length);
+ if (error)
+ goto out_drop_iolock;
+ }
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+ if (error)
+ goto out_drop_iolock;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ xfs_setattr_time(ip, iattr);
+ if (iattr->ia_valid & ATTR_SIZE) {
+ if (iattr->ia_size > i_size_read(inode)) {
+ i_size_write(inode, iattr->ia_size);
+ ip->i_d.di_size = iattr->ia_size;
+ }
+ }
+
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp, 0);
+
+out_drop_iolock:
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+}
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h
new file mode 100644
index 0000000..0d91255
--- /dev/null
+++ b/fs/xfs/xfs_pnfs.h
@@ -0,0 +1,11 @@
+#ifndef _XFS_PNFS_H
+#define _XFS_PNFS_H 1
+
+#ifdef CONFIG_NFSD_PNFS
+int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
+int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
+ struct iomap *iomap, bool write, u32 *device_generation);
+int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
+ struct iattr *iattr);
+#endif /* CONFIG_NFSD_PNFS */
+#endif /* _XFS_PNFS_H */
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2015-01-22 11:10 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-01-22 11:09 a simple and scalable pNFS block layout server V2 Christoph Hellwig
2015-01-22 11:09 ` [PATCH 04/20] nfsd: factor out a helper to decode nfstime4 values Christoph Hellwig
[not found] ` <1421925006-24231-5-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-01-22 20:15 ` J. Bruce Fields
2015-01-22 11:09 ` [PATCH 05/20] nfsd: move nfsd_fh_match to nfsfh.h Christoph Hellwig
[not found] ` <1421925006-24231-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-01-22 11:09 ` [PATCH 01/20] nfs: add LAYOUT_TYPE_MAX enum value Christoph Hellwig
2015-01-22 11:09 ` [PATCH 02/20] fs: track fl_owner for leases Christoph Hellwig
2015-01-22 11:09 ` [PATCH 03/20] fs: add FL_LAYOUT lease type Christoph Hellwig
2015-01-22 15:45 ` Jeff Layton
[not found] ` <1421925006-24231-4-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-01-22 20:14 ` J. Bruce Fields
[not found] ` <20150122201442.GJ898-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
2015-01-22 20:18 ` Christoph Hellwig
2015-01-22 11:09 ` [PATCH 06/20] nfsd: add fh_fsid_match helper Christoph Hellwig
2015-01-22 11:09 ` [PATCH 07/20] nfsd: make lookup/alloc/unhash_stid available outside nfs4state.c Christoph Hellwig
2015-01-22 11:09 ` [PATCH 08/20] nfsd: make find/get/put file " Christoph Hellwig
2015-01-22 11:09 ` [PATCH 10/20] nfsd: implement pNFS operations Christoph Hellwig
[not found] ` <1421925006-24231-11-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-01-29 20:33 ` J. Bruce Fields
[not found] ` <20150129203346.GA11064-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
2015-02-02 12:43 ` Christoph Hellwig
2015-02-02 14:28 ` J. Bruce Fields
[not found] ` <20150202142832.GC22301-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
2015-02-02 14:56 ` Christoph Hellwig
[not found] ` <20150202145619.GA18387-jcswGhMUV9g@public.gmane.org>
2015-02-02 15:00 ` J. Bruce Fields
[not found] ` <20150202150032.GD22301-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
2015-02-02 18:56 ` Christoph Hellwig
[not found] ` <20150202185638.GB23319-jcswGhMUV9g@public.gmane.org>
2015-02-03 16:08 ` J. Bruce Fields
2015-01-22 11:09 ` [PATCH 11/20] nfsd: implement pNFS layout recalls Christoph Hellwig
2015-01-22 11:10 ` [PATCH 14/20] exportfs: add methods for block layout exports Christoph Hellwig
2015-01-22 11:10 ` [PATCH 16/20] xfs: pass a 64-bit count argument to xfs_iomap_write_unwritten Christoph Hellwig
2015-01-29 20:52 ` J. Bruce Fields
2015-02-02 7:30 ` Christoph Hellwig
2015-02-02 19:24 ` Dave Chinner
2015-02-02 19:43 ` Dave Chinner
2015-02-02 19:48 ` J. Bruce Fields
2015-02-03 18:35 ` Christoph Hellwig
[not found] ` <20150203183533.GA16929-jcswGhMUV9g@public.gmane.org>
2015-02-11 22:35 ` J. Bruce Fields
2015-02-11 22:54 ` J. Bruce Fields
2015-02-04 7:57 ` Christoph Hellwig
[not found] ` <20150204075756.GA763-jcswGhMUV9g@public.gmane.org>
2015-02-04 20:02 ` Dave Chinner
2015-01-22 11:10 ` [PATCH 17/20] xfs: update the superblock using a synchronous transaction in growfs Christoph Hellwig
2015-01-22 11:10 ` Christoph Hellwig [this message]
[not found] ` <1421925006-24231-20-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-02-05 0:47 ` [PATCH 19/20] xfs: implement pNFS export operations Dave Chinner
2015-02-05 7:08 ` Christoph Hellwig
[not found] ` <20150205070858.GA593-jcswGhMUV9g@public.gmane.org>
2015-02-05 13:57 ` Christoph Hellwig
[not found] ` <20150205135756.GA6386-jcswGhMUV9g@public.gmane.org>
2015-02-06 22:20 ` Dave Chinner
2015-02-06 22:42 ` J. Bruce Fields
2015-02-08 13:34 ` Christoph Hellwig
[not found] ` <20150208133435.GA27081-jcswGhMUV9g@public.gmane.org>
2015-02-08 14:09 ` Jeff Layton
[not found] ` <20150208090942.51e99687-9yPaYZwiELC+kQycOl6kW4xkIHaj4LzF@public.gmane.org>
2015-02-09 20:11 ` J. Bruce Fields
[not found] ` <20150209201154.GA27746-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
2015-02-10 0:04 ` Dave Chinner
2015-02-13 1:11 ` J. Bruce Fields
2015-02-13 1:54 ` Dave Chinner
2015-02-13 2:38 ` Stephen Rothwell
2015-02-15 23:25 ` Dave Chinner
2015-01-22 11:10 ` [PATCH 20/20] xfs: recall pNFS layouts on conflicting access Christoph Hellwig
[not found] ` <1421925006-24231-21-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-02-05 0:51 ` Dave Chinner
2015-01-22 20:01 ` a simple and scalable pNFS block layout server V2 J. Bruce Fields
2015-01-22 20:06 ` J. Bruce Fields
2015-01-22 20:20 ` Christoph Hellwig
[not found] ` <20150122200618.GI898-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
2015-01-22 20:20 ` Jeff Layton
2015-01-22 11:09 ` [PATCH 09/20] nfsd: make find_any_file available outside nfs4state.c Christoph Hellwig
2015-01-22 11:09 ` [PATCH 12/20] nfsd: update documentation for pNFS support Christoph Hellwig
2015-01-22 11:09 ` [PATCH 13/20] nfsd: add trace events Christoph Hellwig
2015-01-22 11:10 ` [PATCH 15/20] nfsd: pNFS block layout driver Christoph Hellwig
2015-01-22 11:10 ` [PATCH 18/20] xfs: factor out a xfs_update_prealloc_flags() helper Christoph Hellwig
[not found] ` <1421925006-24231-19-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-02-01 23:06 ` Dave Chinner
2015-01-22 16:04 ` a simple and scalable pNFS block layout server V2 Chuck Lever
2015-01-22 16:21 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1421925006-24231-20-git-send-email-hch@lst.de \
--to=hch-jcswghmuv9g@public.gmane.org \
--cc=bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org \
--cc=jlayton-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org \
--cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=xfs-VZNHf3L845pBDgjK7y7TUQ@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).