From: Richard Wareing <rwareing@fb.com>
To: linux-xfs@vger.kernel.org
Cc: david@fromorbit.com, darrick.wong@oracle.com, hch@infradead.org
Subject: [PATCH v5 2/3] xfs: Set realtime flag based on initial allocation size
Date: Mon, 25 Sep 2017 12:44:17 -0700 [thread overview]
Message-ID: <20170925194418.720146-3-rwareing@fb.com> (raw)
In-Reply-To: <20170925194418.720146-1-rwareing@fb.com>
- The rt_alloc_min sysfs option automatically selects the device (data
device, or realtime) based on the size of the initial allocation of the
file.
- This option can be used to route the storage of small files (and the
inefficient workloads associated with them) to a suitable storage
device such a SSD, while larger allocations are sent to a traditional
HDD.
- Supports writes via O_DIRECT, buffered (i.e. page cache), and
pre-allocations (i.e. fallocate)
- Available only when kernel is compiled w/ CONFIG_XFS_RT option.
Signed-off-by: Richard Wareing <rwareing@fb.com>
---
Changes since v4:
* Added xfs_inode_select_target function to hold target selection
code
* XFS_IS_REALTIME_MOUNT check now moved inside xfs_inode_select_target
function for better gating
* Improved consistency in the sysfs set behavior
* Style fixes
Changes since v3:
* Now functions via initial allocation regardless of O_DIRECT, buffered or
pre-allocation code paths. Provides a consistent user-experience.
* I Did do some experiments putting this in the xfs_bmapi_write code path
however pre-allocation accounting unfortunately prevents this cleaner
approach. As such, this proved to be the cleanest and functional approach.
* No longer a mount option, now a sysfs tunable
fs/xfs/xfs_bmap_util.c | 2 ++
fs/xfs/xfs_inode.c | 18 ++++++++++++------
fs/xfs/xfs_iomap.c | 5 +++++
fs/xfs/xfs_mount.h | 1 +
fs/xfs/xfs_rtalloc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_rtalloc.h | 2 ++
fs/xfs/xfs_sysfs.c | 38 ++++++++++++++++++++++++++++++++++++++
7 files changed, 110 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 9e3cc21..8205669d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1026,6 +1026,8 @@ xfs_alloc_file_space(
if (len <= 0)
return -EINVAL;
+ xfs_inode_select_target(ip, len);
+
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ec9826c..f9e2deb 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1620,12 +1620,18 @@ xfs_itruncate_extents(
if (error)
goto out;
- /*
- * Clear the reflink flag if we truncated everything.
- */
- if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
- ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
- xfs_inode_clear_cowblocks_tag(ip);
+ if (ip->i_d.di_nblocks == 0) {
+ /*
+ * Clear the reflink flag if we truncated everything.
+ */
+ if (xfs_is_reflink_inode(ip)) {
+ ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ xfs_inode_clear_cowblocks_tag(ip);
+ }
+ /* Clear realtime flag if m_rt_alloc_min policy is in place */
+ if (XFS_IS_REALTIME_MOUNT(mp) && mp->m_rt_alloc_min) {
+ ip->i_d.di_flags &= ~XFS_DIFLAG_REALTIME;
+ }
}
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf..b3c3b9b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -40,6 +40,7 @@
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
#include "xfs_reflink.h"
+#include "xfs_rtalloc.h"
#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
@@ -174,6 +175,8 @@ xfs_iomap_write_direct(
int bmapi_flags = XFS_BMAPI_PREALLOC;
uint tflags = 0;
+ xfs_inode_select_target(ip, count);
+
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
lockmode = XFS_ILOCK_SHARED; /* locked by caller */
@@ -981,6 +984,8 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
+ xfs_inode_select_target(ip, length);
+
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9fa312a..2adc701 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -197,6 +197,7 @@ typedef struct xfs_mount {
__uint32_t m_generation;
bool m_fail_unmount;
+ uint m_rt_alloc_min; /* Min RT allocation */
#ifdef DEBUG
/*
* DEBUG mode instrumentation to test and/or trigger delayed allocation
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index c57aa7f..421f860 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1284,3 +1284,53 @@ xfs_rtpick_extent(
*pick = b;
return 0;
}
+
+/*
+ * If allocation length is less than rt_alloc_min threshold select the
+ * data device. Otherwise, select the realtime device.
+ */
+void xfs_rt_alloc_min(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip,
+ xfs_off_t len)
+{
+ if (!mp->m_rt_alloc_min)
+ return;
+
+ if (len < mp->m_rt_alloc_min) {
+ ip->i_d.di_flags &= ~XFS_DIFLAG_REALTIME;
+ } else {
+ ip->i_d.di_flags |= XFS_DIFLAG_REALTIME;
+ }
+}
+
+/*
+* Select the target device for the inode based on either the size of the
+* initial allocation, or the amount of space available on the data device.
+*
+*/
+void xfs_inode_select_target(
+ struct xfs_inode *ip,
+ xfs_off_t len)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ /* If the mount does not have a realtime device configured, there's
+ * nothing to do here.
+ */
+ if (!XFS_IS_REALTIME_MOUNT(mp))
+ return;
+
+ /* You cannot select a new device target once blocks have been allocated
+ * (e.g. fallocate() beyond EOF), or if data has been written already.
+ */
+ if (ip->i_d.di_nextents)
+ return;
+ if (ip->i_d.di_size)
+ return;
+
+ /* m_rt_alloc_min controls target selection. Target selection code is
+ * not valid if not set.
+ */
+ xfs_rt_alloc_min(mp, ip, len);
+}
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index f13133e..eaf7ed3 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -136,6 +136,7 @@ int xfs_rtalloc_query_range(struct xfs_trans *tp,
int xfs_rtalloc_query_all(struct xfs_trans *tp,
xfs_rtalloc_query_range_fn fn,
void *priv);
+void xfs_inode_select_target(struct xfs_inode *ip, xfs_off_t len);
#else
# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
@@ -155,6 +156,7 @@ xfs_rtmount_init(
}
# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
# define xfs_rtunmount_inodes(m)
+# define xfs_inode_select_target(i,l)
#endif /* CONFIG_XFS_RT */
#endif /* __XFS_RTALLOC_H__ */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 80ac15f..1e202a1 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -129,10 +129,48 @@ XFS_SYSFS_ATTR_RW(drop_writes);
#endif /* DEBUG */
+STATIC ssize_t
+rt_alloc_min_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ /* Only valid if using a real-time device */
+ if(!XFS_IS_REALTIME_MOUNT(mp))
+ return -EINVAL;
+
+ if (val >= 0)
+ mp->m_rt_alloc_min = val;
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+STATIC ssize_t
+rt_alloc_min_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_rt_alloc_min);
+}
+XFS_SYSFS_ATTR_RW(rt_alloc_min);
+
static struct attribute *xfs_mp_attrs[] = {
#ifdef DEBUG
ATTR_LIST(drop_writes),
#endif
+ ATTR_LIST(rt_alloc_min),
NULL,
};
--
2.9.5
next prev parent reply other threads:[~2017-09-25 19:44 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-25 19:44 [PATCH v5 0/3] XFS realtime device tweaks Richard Wareing
2017-09-25 19:44 ` [PATCH v5 1/3] xfs: Show realtime device stats on statfs calls if inherit flag set Richard Wareing
2017-09-25 22:53 ` Eric Sandeen
2017-09-26 3:32 ` Richard Wareing
2017-09-25 22:55 ` Darrick J. Wong
2017-09-25 19:44 ` Richard Wareing [this message]
2017-09-25 22:47 ` [PATCH v5 2/3] xfs: Set realtime flag based on initial allocation size Darrick J. Wong
2017-09-26 5:25 ` Dave Chinner
2017-09-26 6:11 ` Richard Wareing
2017-09-26 0:13 ` Eric Sandeen
2017-09-26 5:17 ` Richard Wareing
2017-09-25 19:44 ` [PATCH v5 3/3] xfs: Add realtime fallback if data device full Richard Wareing
2017-09-25 22:52 ` Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170925194418.720146-3-rwareing@fb.com \
--to=rwareing@fb.com \
--cc=darrick.wong@oracle.com \
--cc=david@fromorbit.com \
--cc=hch@infradead.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox