From: Richard Wareing <rwareing@fb.com>
To: linux-xfs@vger.kernel.org
Cc: david@fromorbit.com, darrick.wong@oracle.com, hch@infradead.org
Subject: [PATCH v4 2/3] xfs: Set realtime flag based on initial allocation size
Date: Mon, 18 Sep 2017 20:52:37 -0700 [thread overview]
Message-ID: <20170919035238.3976871-3-rwareing@fb.com> (raw)
In-Reply-To: <20170919035238.3976871-1-rwareing@fb.com>
- The rt_alloc_min sysfs option automatically selects the device (data
device, or realtime) based on the size of the initial allocation of the
file.
- This option can be used to route the storage of small files (and the
inefficient workloads associated with them) to a suitable storage
device such a SSD, while larger allocations are sent to a traditional
HDD.
- Supports writes via O_DIRECT, buffered (i.e. page cache), and
pre-allocations (i.e. fallocate)
- Available only when kernel is compiled w/ CONFIG_XFS_RT option.
Signed-off-by: Richard Wareing <rwareing@fb.com>
---
Changes since v3:
* Now functions via initial allocation regardless of O_DIRECT, buffered or
pre-allocation code paths. Provides a consistent user-experience.
* I Did do some experiments putting this in the xfs_bmapi_write code path
however pre-allocation accounting unfortunately prevents this cleaner
approach. As such, this proved to be the cleanest and functional approach.
* No longer a mount option, now a sysfs tunable
Changes since v2:
* None
fs/xfs/xfs_bmap_util.c | 3 +++
fs/xfs/xfs_inode.c | 18 ++++++++++++------
fs/xfs/xfs_iomap.c | 8 ++++++++
fs/xfs/xfs_mount.h | 1 +
fs/xfs/xfs_rtalloc.c | 26 ++++++++++++++++++++++++++
fs/xfs/xfs_rtalloc.h | 4 ++++
fs/xfs/xfs_sysfs.c | 41 +++++++++++++++++++++++++++++++++++++++++
7 files changed, 95 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 9e3cc21..2d253fb 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1026,6 +1026,9 @@ xfs_alloc_file_space(
if (len <= 0)
return -EINVAL;
+ if (XFS_IS_REALTIME_MOUNT(mp))
+ xfs_rt_alloc_min(ip, len);
+
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ec9826c..f9e2deb 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1620,12 +1620,18 @@ xfs_itruncate_extents(
if (error)
goto out;
- /*
- * Clear the reflink flag if we truncated everything.
- */
- if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
- ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
- xfs_inode_clear_cowblocks_tag(ip);
+ if (ip->i_d.di_nblocks == 0) {
+ /*
+ * Clear the reflink flag if we truncated everything.
+ */
+ if (xfs_is_reflink_inode(ip)) {
+ ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ xfs_inode_clear_cowblocks_tag(ip);
+ }
+ /* Clear realtime flag if m_rt_alloc_min policy is in place */
+ if (XFS_IS_REALTIME_MOUNT(mp) && mp->m_rt_alloc_min) {
+ ip->i_d.di_flags &= ~XFS_DIFLAG_REALTIME;
+ }
}
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf..11f1c95 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -40,6 +40,7 @@
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
#include "xfs_reflink.h"
+#include "xfs_rtalloc.h"
#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
@@ -174,6 +175,10 @@ xfs_iomap_write_direct(
int bmapi_flags = XFS_BMAPI_PREALLOC;
uint tflags = 0;
+
+ if (XFS_IS_REALTIME_MOUNT(mp))
+ xfs_rt_alloc_min(ip, count);
+
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
lockmode = XFS_ILOCK_SHARED; /* locked by caller */
@@ -981,6 +986,9 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
+ if (XFS_IS_REALTIME_MOUNT(mp))
+ xfs_rt_alloc_min(ip, length);
+
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9fa312a..067be3b 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -197,6 +197,7 @@ typedef struct xfs_mount {
__uint32_t m_generation;
bool m_fail_unmount;
+ uint m_rt_alloc_min; /* Min RT allocation */
#ifdef DEBUG
/*
* DEBUG mode instrumentation to test and/or trigger delayed allocation
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index c57aa7f..e51cb25 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1284,3 +1284,29 @@ xfs_rtpick_extent(
*pick = b;
return 0;
}
+
+/*
+ * Automatically set real-time flag if initial write to inode is > m_rt_alloc_min
+ *
+ * Also valid on truncations.
+ *
+ */
+void xfs_rt_alloc_min(
+ struct xfs_inode *ip,
+ xfs_off_t len)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (!mp->m_rt_alloc_min || ip->i_d.di_size)
+ return;
+
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ if (len < mp->m_rt_alloc_min) {
+ ip->i_d.di_flags &= ~XFS_DIFLAG_REALTIME;
+ }
+ } else {
+ if (len >= mp->m_rt_alloc_min) {
+ ip->i_d.di_flags |= XFS_DIFLAG_REALTIME;
+ }
+ }
+}
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index f13133e..12939d9 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -136,6 +136,9 @@ int xfs_rtalloc_query_range(struct xfs_trans *tp,
int xfs_rtalloc_query_all(struct xfs_trans *tp,
xfs_rtalloc_query_range_fn fn,
void *priv);
+void xfs_rt_alloc_min(struct xfs_inode *ip, xfs_off_t len);
+
+
#else
# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
@@ -155,6 +158,7 @@ xfs_rtmount_init(
}
# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
# define xfs_rtunmount_inodes(m)
+# define xfs_rt_alloc_min(i,l) (ENOSYS)
#endif /* CONFIG_XFS_RT */
#endif /* __XFS_RTALLOC_H__ */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 80ac15f..3c8dedb 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -129,10 +129,51 @@ XFS_SYSFS_ATTR_RW(drop_writes);
#endif /* DEBUG */
+#ifdef CONFIG_XFS_RT
+STATIC ssize_t
+rt_alloc_min_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ /* Only valid if using a real-time device */
+ if (XFS_IS_REALTIME_MOUNT(mp) && val > 0)
+ mp->m_rt_alloc_min = val;
+ else if (val <= 0)
+ mp->m_rt_alloc_min = 0;
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+STATIC ssize_t
+rt_alloc_min_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_rt_alloc_min);
+}
+XFS_SYSFS_ATTR_RW(rt_alloc_min);
+#endif /* CONFIG_XFS_RT */
+
static struct attribute *xfs_mp_attrs[] = {
#ifdef DEBUG
ATTR_LIST(drop_writes),
#endif
+#ifdef CONFIG_XFS_RT
+ ATTR_LIST(rt_alloc_min),
+#endif
NULL,
};
--
2.9.5
next prev parent reply other threads:[~2017-09-19 3:52 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-19 3:52 [PATCH v4 0/3] XFS realtime device tweaks Richard Wareing
2017-09-19 3:52 ` [PATCH v4 1/3] xfs: Show realtime device stats on statfs calls if inherit flag set Richard Wareing
2017-09-22 5:35 ` Dave Chinner
2017-09-22 18:26 ` Richard Wareing
2017-09-19 3:52 ` Richard Wareing [this message]
2017-09-22 5:54 ` [PATCH v4 2/3] xfs: Set realtime flag based on initial allocation size Dave Chinner
2017-09-22 19:06 ` Richard Wareing
2017-09-19 3:52 ` [PATCH v4 3/3] xfs: Add realtime fallback if data device full Richard Wareing
2017-09-22 7:04 ` Dave Chinner
2017-09-25 18:37 ` Richard Wareing
2017-09-25 23:16 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170919035238.3976871-3-rwareing@fb.com \
--to=rwareing@fb.com \
--cc=darrick.wong@oracle.com \
--cc=david@fromorbit.com \
--cc=hch@infradead.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.