[PATCH 06/12] btrfs: introduce device dynamic state transition to offline or failed

linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Anand Jain <anand.jain@oracle.com>
To: linux-btrfs@vger.kernel.org
Cc: clm@fb.com, dsterba@suse.cz
Subject: [PATCH 06/12] btrfs: introduce device dynamic state transition to offline or failed
Date: Tue, 29 Mar 2016 22:22:23 +0800	[thread overview]
Message-ID: <1459261349-32206-7-git-send-email-anand.jain@oracle.com> (raw)
In-Reply-To: <1459261349-32206-1-git-send-email-anand.jain@oracle.com>

Need device forced offline/failed feature for the following reasons,
1) a. it can be reported that device has failed when it does
   b. close the device when it goes offline so that blocklayer can
      cleanup
2) identify the candidate for the auto replace
3) avoid further commit error reported against the failing device and
4) a device in the multi device btrfs may go offline from the system
   (but as of now in in some system config btrfs gets unmounted in this
    context, which is not a correct behavior)

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/volumes.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h |  14 ++++++
 2 files changed, 151 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index dff2deaf88d3..a662701d4f22 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7092,3 +7092,140 @@ out:
 	read_unlock(&map_tree->map_tree.lock);
 	return ret;
 }
+
+static void __close_device(struct work_struct *work)
+{
+	struct btrfs_device *device;
+
+	device = container_of(work, struct btrfs_device, rcu_work);
+
+	if (device->bdev)
+		blkdev_put(device->bdev, device->mode);
+
+	device->bdev = NULL;
+}
+
+static void close_device(struct rcu_head *head)
+{
+	struct btrfs_device *device;
+
+	device = container_of(head, struct btrfs_device, rcu);
+
+	INIT_WORK(&device->rcu_work, __close_device);
+	schedule_work(&device->rcu_work);
+}
+
+void btrfs_close_one_device_dont_free(struct btrfs_device *device)
+{
+	struct btrfs_fs_devices *fs_devices = device->fs_devices;
+
+	if (device->bdev)
+		fs_devices->open_devices--;
+
+	if (device->writeable &&
+	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
+		list_del_init(&device->dev_alloc_list);
+		fs_devices->rw_devices--;
+	}
+
+	device->writeable = 0;
+
+	call_rcu(&device->rcu, close_device);
+}
+
+void force_device_close(struct btrfs_device *device)
+{
+	struct btrfs_device *next_device;
+	struct btrfs_fs_devices *fs_devices;
+
+	fs_devices = device->fs_devices;
+
+	mutex_lock(&fs_devices->device_list_mutex);
+	lock_chunks(fs_devices->fs_info->fs_root);
+
+	next_device = list_entry(fs_devices->devices.next,
+					struct btrfs_device, dev_list);
+	if (device->bdev == fs_devices->fs_info->sb->s_bdev)
+		fs_devices->fs_info->sb->s_bdev = next_device->bdev;
+
+	if (device->bdev == fs_devices->latest_bdev)
+		fs_devices->latest_bdev = next_device->bdev;
+
+	btrfs_close_one_device_dont_free(device);
+
+	/*
+	 * TODO: works for now, but its better to keep the state of
+	 * missing and offline different, and update rest of the
+	 * places where we check for only missing and not for failed
+	 * or offline as of now.
+	 */
+	device->missing = 1;
+	fs_devices->missing_devices++;
+	device->writeable = 0;
+
+	rcu_barrier();
+
+	unlock_chunks(fs_devices->fs_info->fs_root);
+	mutex_unlock(&fs_devices->device_list_mutex);
+}
+
+void btrfs_force_device_close(struct btrfs_device *dev, char *why)
+{
+	bool degrade_option;
+	int tolerated_fail;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_fs_devices *fs_devices;
+
+	fs_devices = dev->fs_devices;
+	fs_info = fs_devices->fs_info;
+	degrade_option = btrfs_test_opt(fs_info->fs_root, DEGRADED);
+
+	/* todo: support seed later */
+	if (fs_devices->seeding)
+		return;
+
+	/* this shouldn't be called if device is already missing */
+	if (dev->missing || !dev->bdev)
+		return;
+
+	if (dev->offline || dev->failed)
+		return;
+
+	/* Only RW device is requested to force close let FS handle it*/
+	if (fs_devices->rw_devices == 1) {
+		btrfs_std_error(fs_info, -EIO,
+			"force offline last RW device");
+		return;
+	}
+
+	if (!strcmp(why, "offline"))
+		dev->offline = 1;
+	else if (!strcmp(why, "failed"))
+		dev->failed = 1;
+	else
+		return;
+
+	btrfs_sysfs_rm_device_link(fs_devices, dev);
+
+	force_device_close(dev);
+
+	tolerated_fail = btrfs_check_degradable(fs_info,
+						fs_info->sb->s_flags);
+	if (tolerated_fail > 0) {
+		btrfs_warn_in_rcu(fs_info, "device %s %s, chunks degraded",
+					rcu_str_deref(dev->name), why);
+	} else if(tolerated_fail < 0) {
+		btrfs_warn_in_rcu(fs_info,
+		"device %s %s, chunks failed",
+			rcu_str_deref(dev->name), why);
+		btrfs_std_error(fs_info, -EIO, "devices below critical level");
+	} else {
+		btrfs_warn_in_rcu(fs_info,
+			"device %s %s, No chunks are degraded",
+			rcu_str_deref(dev->name), why);
+	}
+	btrfs_info_in_rcu(fs_info,
+		"num_devices %llu rw_devices %llu degraded-option: %s",
+		fs_devices->num_devices, fs_devices->rw_devices,
+		degrade_option ? "set":"unset");
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 48ced5cc09e4..ccc716b3c419 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -72,7 +72,20 @@ struct btrfs_device {
 
 	int writeable;
 	int in_fs_metadata;
+	/* missing: device wasn't found at the time of mount */
+	/* fixme: correct usage of missing_devices and missing */
 	int missing;
+	/* failed: device confirmed to have experienced critical io failure */
+	int failed;
+	/*
+	 * offline: system or user or block layer transport has removed
+	 * offlined the device which was once present and without going
+	 * through unmount. Implies an intriem communication break down
+	 * and not necessarily a candidate for the device replace. And
+	 * device might be online after user intervention or after
+	 * block transport layer error recovery.
+	 */
+	int offline;
 	int can_discard;
 	int is_tgtdev_for_dev_replace;
 
@@ -571,5 +584,6 @@ struct list_head *btrfs_get_fs_uuids(void);
 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
 int btrfs_check_degradable(struct btrfs_fs_info *fs_info, unsigned flags);
+void btrfs_force_device_close(struct btrfs_device *dev, char *why);
 
 #endif
-- 
2.7.0

next prev parent reply	other threads:[~2016-03-29 14:22 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-29 14:22 [PATCH v2 00/15] Introduce device state 'failed', Hot spare and Auto replace Anand Jain
2016-03-29 14:22 ` [PATCH 01/12] btrfs: Introduce a new function to check if all chunks a OK for degraded mount Anand Jain
2016-03-29 14:22 ` [PATCH 02/12] btrfs: Do per-chunk check for mount time check Anand Jain
2016-03-29 14:22 ` [PATCH 03/12] btrfs: Do per-chunk degraded check for remount Anand Jain
2016-03-29 14:22 ` [PATCH 04/12] btrfs: Allow barrier_all_devices to do per-chunk device check Anand Jain
2016-03-29 14:22 ` [PATCH 05/12] btrfs: Cleanup num_tolerated_disk_barrier_failures Anand Jain
2016-03-29 14:22 ` Anand Jain [this message]
2016-03-29 14:22 ` [PATCH 07/12] btrfs: introduce BTRFS_FEATURE_INCOMPAT_SPARE_DEV Anand Jain
2016-03-29 14:22 ` [PATCH 08/12] btrfs: add check not to mount a spare device Anand Jain
2016-03-29 14:22 ` [PATCH 09/12] btrfs: support btrfs dev scan for " Anand Jain
2016-03-29 14:22 ` [PATCH 10/12] btrfs: provide framework to get and put a " Anand Jain
2016-03-29 14:22 ` [PATCH 11/12] btrfs: introduce helper functions to perform hot replace Anand Jain
2016-03-29 14:45   ` kbuild test robot
2016-03-30 10:13     ` Anand Jain
2016-03-31  2:14       ` [kbuild-all] " Fengguang Wu
2016-03-29 14:22 ` [PATCH 12/12] btrfs: check device for critical errors and mark failed Anand Jain
2016-03-29 22:41   ` Yauhen Kharuzhy
2016-04-01 23:53     ` Anand Jain
2016-03-30  0:49   ` Yauhen Kharuzhy
2016-04-01 23:59     ` Anand Jain
2016-03-29 14:27 ` [PATCH 1/4] btrfs-progs: Introduce BTRFS_FEATURE_INCOMPAT_SPARE_DEV SB flags Anand Jain
2016-03-29 14:27   ` [PATCH v2 2/4] btrfs-progs: Introduce btrfs spare subcommand Anand Jain
2016-03-29 14:27   ` [PATCH 3/4] btrfs-progs: add fi show for spare Anand Jain
2016-03-29 14:27   ` [PATCH 4/4] btrfs-progs: add global spare device list to filesystem show Anand Jain
2016-03-29 17:30 ` [PATCH v2 00/15] Introduce device state 'failed', Hot spare and Auto replace Austin S. Hemmelgarn

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:dff2deaf88d dfblob:a662701d4f2 dfblob:48ced5cc09e
dfblob:ccc716b3c41 )
 OR (
bs:"[PATCH 06/12] btrfs: introduce device dynamic state transition to offline or failed" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1459261349-32206-7-git-send-email-anand.jain@oracle.com \
    --to=anand.jain@oracle.com \
    --cc=clm@fb.com \
    --cc=dsterba@suse.cz \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).