All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] btrfs: read_policy type device
@ 2022-07-25 15:10 Anand Jain
  2022-07-25 15:10 ` [PATCH 1/2] btrfs: introduce new device-state read_preferred Anand Jain
  2022-07-25 15:10 ` [PATCH 2/2] btrfs: introduce new read_policy device Anand Jain
  0 siblings, 2 replies; 3+ messages in thread
From: Anand Jain @ 2022-07-25 15:10 UTC (permalink / raw)
  To: linux-btrfs

This patch set provides a read_policy type device and was part of the
other read_policies before [1].

[1]
  Re: [PATCH v4 0/3, full-cover-letter] btrfs: read_policy types latency, device and round-robin
  https://lore.kernel.org/linux-btrfs/20210120123437.OVx7ybGaVfmOdZxtpp43qcB_ORHQQs5OzPSzr3ZUGbo@z/T/

I am sending them separately as they help to test the integrity of the
mirrored RAID devices.

No change from the previous V4 except for the rebase conflict fixes.

Anand Jain (2):
  btrfs: introduce new device-state read_preferred
  btrfs: introduce new read_policy device

 fs/btrfs/sysfs.c   | 55 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/volumes.c | 23 +++++++++++++++++++
 fs/btrfs/volumes.h |  3 +++
 3 files changed, 80 insertions(+), 1 deletion(-)

-- 
2.33.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] btrfs: introduce new device-state read_preferred
  2022-07-25 15:10 [PATCH 0/2] btrfs: read_policy type device Anand Jain
@ 2022-07-25 15:10 ` Anand Jain
  2022-07-25 15:10 ` [PATCH 2/2] btrfs: introduce new read_policy device Anand Jain
  1 sibling, 0 replies; 3+ messages in thread
From: Anand Jain @ 2022-07-25 15:10 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Josef Bacik

This is a preparatory patch and introduces a new device flag
'read_preferred', RW-able using sysfs interface.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/sysfs.c   | 53 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h |  1 +
 2 files changed, 54 insertions(+)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index d5d0717fd09a..ca9812cabece 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1785,6 +1785,58 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
 }
 BTRFS_ATTR(devid, error_stats, btrfs_devinfo_error_stats_show);
 
+static ssize_t btrfs_devinfo_read_pref_show(struct kobject *kobj,
+					    struct kobj_attribute *a, char *buf)
+{
+	int val;
+	struct btrfs_device *device = container_of(kobj, struct btrfs_device,
+						   devid_kobj);
+
+	val = !!test_bit(BTRFS_DEV_STATE_READ_PREFERRED, &device->dev_state);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t btrfs_devinfo_read_pref_store(struct kobject *kobj,
+					     struct kobj_attribute *a,
+					     const char *buf, size_t len)
+{
+	int ret;
+	unsigned long val;
+	struct btrfs_device *device;
+
+	ret = kstrtoul(skip_spaces(buf), 0, &val);
+	if (ret)
+		return ret;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	/*
+	 * lock is not required, the btrfs_device struct can't be freed while
+	 * its kobject btrfs_device::devid_kobj is still open.
+	 */
+	device = container_of(kobj, struct btrfs_device, devid_kobj);
+
+	if (val &&
+	    !test_bit(BTRFS_DEV_STATE_READ_PREFERRED, &device->dev_state)) {
+		set_bit(BTRFS_DEV_STATE_READ_PREFERRED, &device->dev_state);
+		btrfs_info(device->fs_devices->fs_info,
+			   "set read preferred on devid %llu (%d)",
+			   device->devid, task_pid_nr(current));
+	} else if (!val &&
+		   test_bit(BTRFS_DEV_STATE_READ_PREFERRED, &device->dev_state)) {
+		clear_bit(BTRFS_DEV_STATE_READ_PREFERRED, &device->dev_state);
+		btrfs_info(device->fs_devices->fs_info,
+			   "reset read preferred on devid %llu (%d)",
+			   device->devid, task_pid_nr(current));
+	}
+
+	return len;
+}
+BTRFS_ATTR_RW(devid, read_preferred, btrfs_devinfo_read_pref_show,
+	      btrfs_devinfo_read_pref_store);
+
 /*
  * Information about one device.
  *
@@ -1798,6 +1850,7 @@ static struct attribute *devid_attrs[] = {
 	BTRFS_ATTR_PTR(devid, replace_target),
 	BTRFS_ATTR_PTR(devid, scrub_speed_max),
 	BTRFS_ATTR_PTR(devid, writeable),
+	BTRFS_ATTR_PTR(devid, read_preferred),
 	NULL
 };
 ATTRIBUTE_GROUPS(devid);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5639961b3626..f04a177136b5 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -85,6 +85,7 @@ struct btrfs_io_geometry {
 #define BTRFS_DEV_STATE_REPLACE_TGT	(3)
 #define BTRFS_DEV_STATE_FLUSH_SENT	(4)
 #define BTRFS_DEV_STATE_NO_READA	(5)
+#define BTRFS_DEV_STATE_READ_PREFERRED	(6)
 
 struct btrfs_zoned_device_info;
 
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] btrfs: introduce new read_policy device
  2022-07-25 15:10 [PATCH 0/2] btrfs: read_policy type device Anand Jain
  2022-07-25 15:10 ` [PATCH 1/2] btrfs: introduce new device-state read_preferred Anand Jain
@ 2022-07-25 15:10 ` Anand Jain
  1 sibling, 0 replies; 3+ messages in thread
From: Anand Jain @ 2022-07-25 15:10 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Josef Bacik

Read-policy type 'device' and device flag 'read-preferred':

The read-policy type device picks the device(s) flagged as
read-preferred for reading stripes of type raid1, raid10,
raid1c3 and raid1c4.

A system might contain SSD, nvme, iscsi, or san lun, and which are all
a non-rotational device, so it is not a good idea to set the read-preferred
automatically. Instead, device read-policy along with the read-preferred
flag provides an ability to do it manually. This advanced tuning is useful
in more than one situation, for example,
 - In heterogeneous-disk volume, it provides an ability to manually choose
    the low latency disks for reading.
 - Useful for more accurate testing.
 - Avoid known problematic device from reading the chunk until it is
   replaced (by marking the other good devices as read-preferred).


Note:

If the read-policy type is set to 'device', but there isn't any device
which is flagged as read-preferred, then stripe 0 is used for reading.

The device replacement won't migrate the read-preferred flag to the new
replace the target device.

As of now, this is an in-memory only feature.

It's pointless to set the read-preferred flag on the missing device, as
IOs aren't submitted to the missing device.

If there is more than one read-preferred device in a chunk, the read IO
shall go to the stripe 0 as of now.

Usage example:

Consider a typical two disks raid1.

Configure devid1 for reading.

 $ echo 1 > devinfo/1/read_preferred
 $ cat devinfo/1/read_preferred
 1
 $ cat devinfo/2/read_preferred
 0

 $ pwd
 /sys/fs/btrfs/12345678-1234-1234-1234-123456789abc

 $ cat read_policy
 [pid] device
 $ echo device > ./read_policy
 $ cat read_policy
 pid [device]

Now read IOs are sent to devid 1 (sdb).

 $ echo 3 > /proc/sys/vm/drop_caches
 $ md5sum /btrfs/YkZI

 $ iostat -zy 1 | egrep 'sdb|sdc' (from another terminal)
 sdb              50.00     40048.00         0.00      40048          0

Change the read-preferred device from devid 1 to devid 2 (sdc).

 $ echo 0 > ./devinfo/1/read_preferred

[ 3343.918658] BTRFS info (device sdb): reset read preferred on devid 1 (1334)

 $ echo 1 > ./devinfo/2/read_preferred

[ 3343.919876] BTRFS info (device sdb): set read preferred on devid 2 (1334)

 $ echo 3 > /proc/sys/vm/drop_caches
 $ md5sum /btrfs/YkZI

Further read ios are sent to devid 2 (sdc).

 $ iostat -zy 1 | egrep 'sdb|sdc' (from another terminal)
 sdc              49.00     40048.00         0.00      40048          0

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/sysfs.c   |  2 +-
 fs/btrfs/volumes.c | 23 +++++++++++++++++++++++
 fs/btrfs/volumes.h |  2 ++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index ca9812cabece..7c15fa1a8b33 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1169,7 +1169,7 @@ static bool strmatch(const char *buffer, const char *string)
 	return false;
 }
 
-static const char * const btrfs_read_policy_name[] = { "pid" };
+static const char * const btrfs_read_policy_name[] = { "pid", "device" };
 
 static ssize_t btrfs_read_policy_show(struct kobject *kobj,
 				      struct kobj_attribute *a, char *buf)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 272901514b0c..cf358926b52a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5793,6 +5793,25 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	return ret;
 }
 
+static int btrfs_find_read_preferred(struct map_lookup *map, int first, int num_stripe)
+{
+	int stripe_index;
+	int last = first + num_stripe;
+
+	/*
+	 * If there are more than one read preferred devices, then just pick the
+	 * first found read preferred device as of now.
+	 */
+	for (stripe_index = first; stripe_index < last; stripe_index++) {
+		if (test_bit(BTRFS_DEV_STATE_READ_PREFERRED,
+			     &map->stripes[stripe_index].dev->dev_state))
+			return stripe_index;
+	}
+
+	/* If there is no read preferred device then just use the first stripe */
+	return first;
+}
+
 static int find_live_mirror(struct btrfs_fs_info *fs_info,
 			    struct map_lookup *map, int first,
 			    int dev_replace_is_ongoing)
@@ -5822,6 +5841,10 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
 	case BTRFS_READ_POLICY_PID:
 		preferred_mirror = first + (current->pid % num_stripes);
 		break;
+	case BTRFS_READ_POLICY_DEVICE:
+		preferred_mirror = btrfs_find_read_preferred(map, first,
+							     num_stripes);
+		break;
 	}
 
 	if (dev_replace_is_ongoing &&
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index f04a177136b5..e62252061606 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -260,6 +260,8 @@ enum btrfs_chunk_allocation_policy {
 enum btrfs_read_policy {
 	/* Use process PID to choose the stripe */
 	BTRFS_READ_POLICY_PID,
+	/* Use the device marked with READ_PREFERRED state */
+	BTRFS_READ_POLICY_DEVICE,
 	BTRFS_NR_READ_POLICY,
 };
 
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-07-25 15:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-07-25 15:10 [PATCH 0/2] btrfs: read_policy type device Anand Jain
2022-07-25 15:10 ` [PATCH 1/2] btrfs: introduce new device-state read_preferred Anand Jain
2022-07-25 15:10 ` [PATCH 2/2] btrfs: introduce new read_policy device Anand Jain

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.