From: Goffredo Baroncelli <kreijack@libero.it>
To: linux-btrfs@vger.kernel.org
Cc: Goffredo Baroncelli <kreijack@inwind.it>
Subject: [PATCH] btrfs: add ssd_metadata mode
Date: Wed, 1 Apr 2020 22:03:16 +0200 [thread overview]
Message-ID: <20200401200316.9917-2-kreijack@libero.it> (raw)
In-Reply-To: <20200401200316.9917-1-kreijack@libero.it>
From: Goffredo Baroncelli <kreijack@inwind.it>
When this mode is enabled, the allocation policy of the chunk
is so modified:
- when a metadata chunk is allocated, priority is given to
ssd disk.
- When a data chunk is allocated, priority is given to a
rotational disk.
When a striped profile is involved (like RAID0,5,6), the logic
is a bit more complex. If there are enough disks, the data profiles
are stored on the rotational disks only; the metadata profiles
are stored on the non rotational disk only.
If the disks are not enough, then the profiles is stored on all
the disks.
Example: assuming that sda, sdb, sdc are ssd disks, and sde, sdf are
rotational ones.
A data profile raid5, will be stored on sda, sdb, sdc, sde, sdf (sde
and sdf are not enough to host a raid5 profile).
A metadata profile raid5, will be stored on sda, sdb, sdc (these
are enough to host a raid5 profile).
To enable this mode pass -o ssd_metadata at mount time.
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
---
fs/btrfs/ctree.h | 1 +
fs/btrfs/super.c | 8 +++++
fs/btrfs/volumes.c | 89 ++++++++++++++++++++++++++++++++++++++++++++--
fs/btrfs/volumes.h | 1 +
4 files changed, 97 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2e9f938508e9..0f3c09cc4863 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1187,6 +1187,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
+#define BTRFS_MOUNT_SSD_METADATA (1 << 29)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c6557d44907a..d0a5cf496f90 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -346,6 +346,7 @@ enum {
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
Opt_ref_verify,
#endif
+ Opt_ssd_metadata,
Opt_err,
};
@@ -416,6 +417,7 @@ static const match_table_t tokens = {
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
{Opt_ref_verify, "ref_verify"},
#endif
+ {Opt_ssd_metadata, "ssd_metadata"},
{Opt_err, NULL},
};
@@ -853,6 +855,10 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, REF_VERIFY);
break;
#endif
+ case Opt_ssd_metadata:
+ btrfs_set_and_info(info, SSD_METADATA,
+ "enabling ssd_metadata");
+ break;
case Opt_err:
btrfs_info(info, "unrecognized mount option '%s'", p);
ret = -EINVAL;
@@ -1369,6 +1375,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
#endif
if (btrfs_test_opt(info, REF_VERIFY))
seq_puts(seq, ",ref_verify");
+ if (btrfs_test_opt(info, SSD_METADATA))
+ seq_puts(seq, ",ssd_metadata");
seq_printf(seq, ",subvolid=%llu",
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
seq_puts(seq, ",subvol=");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a8b71ded4d21..678dc3366711 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4758,6 +4758,58 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
return 0;
}
+/*
+ * sort the devices in descending order by rotational,
+ * max_avail, total_avail
+ */
+static int btrfs_cmp_device_info_metadata(const void *a, const void *b)
+{
+ const struct btrfs_device_info *di_a = a;
+ const struct btrfs_device_info *di_b = b;
+
+ /* metadata -> non rotational first */
+ if (!di_a->rotational && di_b->rotational)
+ return -1;
+ if (di_a->rotational && !di_b->rotational)
+ return 1;
+ if (di_a->max_avail > di_b->max_avail)
+ return -1;
+ if (di_a->max_avail < di_b->max_avail)
+ return 1;
+ if (di_a->total_avail > di_b->total_avail)
+ return -1;
+ if (di_a->total_avail < di_b->total_avail)
+ return 1;
+ return 0;
+}
+
+/*
+ * sort the devices in descending order by !rotational,
+ * max_avail, total_avail
+ */
+static int btrfs_cmp_device_info_data(const void *a, const void *b)
+{
+ const struct btrfs_device_info *di_a = a;
+ const struct btrfs_device_info *di_b = b;
+
+ /* data -> non rotational last */
+ if (!di_a->rotational && di_b->rotational)
+ return 1;
+ if (di_a->rotational && !di_b->rotational)
+ return -1;
+ if (di_a->max_avail > di_b->max_avail)
+ return -1;
+ if (di_a->max_avail < di_b->max_avail)
+ return 1;
+ if (di_a->total_avail > di_b->total_avail)
+ return -1;
+ if (di_a->total_avail < di_b->total_avail)
+ return 1;
+ return 0;
+}
+
+
+
static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
{
if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
@@ -4805,6 +4857,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
int i;
int j;
int index;
+ int nr_rotational;
BUG_ON(!alloc_profile_is_valid(type, 0));
@@ -4860,6 +4913,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
* about the available holes on each device.
*/
ndevs = 0;
+ nr_rotational = 0;
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
u64 max_avail;
u64 dev_offset;
@@ -4911,14 +4965,45 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
devices_info[ndevs].max_avail = max_avail;
devices_info[ndevs].total_avail = total_avail;
devices_info[ndevs].dev = device;
+ devices_info[ndevs].rotational = !test_bit(QUEUE_FLAG_NONROT,
+ &(bdev_get_queue(device->bdev)->queue_flags));
+ if (devices_info[ndevs].rotational)
+ nr_rotational++;
++ndevs;
}
+ BUG_ON(nr_rotational > ndevs);
/*
* now sort the devices by hole size / available space
*/
- sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
- btrfs_cmp_device_info, NULL);
+ if (((type & BTRFS_BLOCK_GROUP_DATA) &&
+ (type & BTRFS_BLOCK_GROUP_METADATA)) ||
+ !btrfs_test_opt(info, SSD_METADATA)) {
+ /* mixed bg or SSD_METADATA not set */
+ sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
+ btrfs_cmp_device_info, NULL);
+ } else {
+ /*
+ * if SSD_METADATA is set, sort the device considering also the
+ * kind (ssd or not). Limit the availables devices to the ones
+ * of the same kind, to avoid that a striped profile like raid5
+ * spans to all kind of devices (ssd and rotational).
+ * It is allowed to span different kind of devices if the ones of
+ * the same kind are not enough alone.
+ */
+ if (type & BTRFS_BLOCK_GROUP_DATA) {
+ sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
+ btrfs_cmp_device_info_data, NULL);
+ if (nr_rotational > devs_min)
+ ndevs = nr_rotational;
+ } else {
+ int nr_norot = ndevs - nr_rotational;
+ sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
+ btrfs_cmp_device_info_metadata, NULL);
+ if (nr_norot > devs_min)
+ ndevs = nr_norot;
+ }
+ }
/*
* Round down to number of usable stripes, devs_increment can be any
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index fc1b564b9cfe..bc1cfa0c27ea 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -340,6 +340,7 @@ struct btrfs_device_info {
u64 dev_offset;
u64 max_avail;
u64 total_avail;
+ int rotational:1;
};
struct btrfs_raid_attr {
--
2.26.0
next prev parent reply other threads:[~2020-04-01 20:03 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-01 20:03 [RFC] btrfs: ssd_metadata: storing metadata on SSD Goffredo Baroncelli
2020-04-01 20:03 ` Goffredo Baroncelli [this message]
2020-04-01 20:53 ` [PATCH] btrfs: add ssd_metadata mode Goffredo Baroncelli
2020-04-02 9:33 ` Steven Davies
2020-04-02 16:39 ` Goffredo Baroncelli
2020-04-03 8:43 ` Michael
2020-04-03 10:08 ` Steven Davies
2020-04-03 16:19 ` Goffredo Baroncelli
2020-04-03 16:28 ` Hugo Mills
2020-04-03 16:36 ` Hans van Kranenburg
2020-04-02 18:01 ` Martin Svec
-- strict thread matches above, loose matches on Subject: below --
2020-04-05 7:19 [RFC][PATCH v2] btrfs: ssd_metadata: storing metadata on SSD Goffredo Baroncelli
2020-04-05 7:19 ` [PATCH] btrfs: add ssd_metadata mode Goffredo Baroncelli
2020-04-05 8:26 [RFC][PATCH V3] btrfs: ssd_metadata: storing metadata on SSD Goffredo Baroncelli
2020-04-05 8:26 ` [PATCH] btrfs: add ssd_metadata mode Goffredo Baroncelli
2020-04-14 5:24 ` Paul Jones
2020-10-23 7:23 ` Wang Yugui
2020-10-23 10:11 ` Adam Borowski
2020-10-23 11:25 ` Qu Wenruo
2020-10-23 12:37 ` Wang Yugui
2020-10-23 12:45 ` Qu Wenruo
2020-10-23 13:10 ` Steven Davies
2020-10-23 13:49 ` Wang Yugui
2020-10-23 18:03 ` Goffredo Baroncelli
2020-10-24 3:26 ` Paul Jones
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200401200316.9917-2-kreijack@libero.it \
--to=kreijack@libero.it \
--cc=kreijack@inwind.it \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).