linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Nikolay Borisov <nborisov@suse.com>
To: Liu Bo <bo.li.liu@oracle.com>, linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 01/14] Btrfs: raid56: add raid56 log via add_dev v2 ioctl
Date: Wed, 2 Aug 2017 22:25:12 +0300	[thread overview]
Message-ID: <b200b004-728a-700b-738f-317d2cd7d5e3@suse.com> (raw)
In-Reply-To: <20170801161439.13426-2-bo.li.liu@oracle.com>



On  1.08.2017 19:14, Liu Bo wrote:
> This introduces add_dev_v2 ioctl to add a device as raid56 journal
> device.  With the help of a journal device, raid56 is able to to get
> rid of potential write holes.
> 
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
>  fs/btrfs/ctree.h                |  6 ++++++
>  fs/btrfs/ioctl.c                | 48 ++++++++++++++++++++++++++++++++++++++++-
>  fs/btrfs/raid56.c               | 42 ++++++++++++++++++++++++++++++++++++
>  fs/btrfs/raid56.h               |  1 +
>  fs/btrfs/volumes.c              | 26 ++++++++++++++++------
>  fs/btrfs/volumes.h              |  3 ++-
>  include/uapi/linux/btrfs.h      |  3 +++
>  include/uapi/linux/btrfs_tree.h |  4 ++++
>  8 files changed, 125 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 643c70d..d967627 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -697,6 +697,7 @@ struct btrfs_stripe_hash_table {
>  void btrfs_init_async_reclaim_work(struct work_struct *work);
>  
>  /* fs_info */
> +struct btrfs_r5l_log;
>  struct reloc_control;
>  struct btrfs_device;
>  struct btrfs_fs_devices;
> @@ -1114,6 +1115,9 @@ struct btrfs_fs_info {
>  	u32 nodesize;
>  	u32 sectorsize;
>  	u32 stripesize;
> +
> +	/* raid56 log */
> +	struct btrfs_r5l_log *r5log;
>  };
>  
>  static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
> @@ -2932,6 +2936,8 @@ static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
>  
>  static inline void free_fs_info(struct btrfs_fs_info *fs_info)
>  {
> +	if (fs_info->r5log)
> +		kfree(fs_info->r5log);
>  	kfree(fs_info->balance_ctl);
>  	kfree(fs_info->delayed_root);
>  	kfree(fs_info->extent_root);
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index e176375..3d1ef4d 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -2653,6 +2653,50 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
>  	return ret;
>  }
>  
> +/* identical to btrfs_ioctl_add_dev, but this is with flags */
> +static long btrfs_ioctl_add_dev_v2(struct btrfs_fs_info *fs_info, void __user *arg)
> +{
> +	struct btrfs_ioctl_vol_args_v2 *vol_args;
> +	int ret;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
> +		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
> +
> +	mutex_lock(&fs_info->volume_mutex);
> +	vol_args = memdup_user(arg, sizeof(*vol_args));
> +	if (IS_ERR(vol_args)) {
> +		ret = PTR_ERR(vol_args);
> +		goto out;
> +	}
> +
> +	if (vol_args->flags & BTRFS_DEVICE_RAID56_LOG &&
> +	    fs_info->r5log) {
> +		ret = -EEXIST;
> +		btrfs_info(fs_info, "r5log: attempting to add another log device!");
> +		goto out_free;
> +	}
> +
> +	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
> +	ret = btrfs_init_new_device(fs_info, vol_args->name, vol_args->flags);
> +	if (!ret) {
> +		if (vol_args->flags & BTRFS_DEVICE_RAID56_LOG) {
> +			ASSERT(fs_info->r5log);
> +			btrfs_info(fs_info, "disk added %s as raid56 log", vol_args->name);
> +		} else {
> +			btrfs_info(fs_info, "disk added %s", vol_args->name);
> +		}
> +	}
> +out_free:
> +	kfree(vol_args);
> +out:
> +	mutex_unlock(&fs_info->volume_mutex);
> +	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> +	return ret;
> +}
> +
>  static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
>  {
>  	struct btrfs_ioctl_vol_args *vol_args;
> @@ -2672,7 +2716,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
>  	}
>  
>  	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
> -	ret = btrfs_init_new_device(fs_info, vol_args->name);
> +	ret = btrfs_init_new_device(fs_info, vol_args->name, 0);
>  
>  	if (!ret)
>  		btrfs_info(fs_info, "disk added %s", vol_args->name);
> @@ -5539,6 +5583,8 @@ long btrfs_ioctl(struct file *file, unsigned int
>  		return btrfs_ioctl_resize(file, argp);
>  	case BTRFS_IOC_ADD_DEV:
>  		return btrfs_ioctl_add_dev(fs_info, argp);
> +	case BTRFS_IOC_ADD_DEV_V2:
> +		return btrfs_ioctl_add_dev_v2(fs_info, argp);
>  	case BTRFS_IOC_RM_DEV:
>  		return btrfs_ioctl_rm_dev(file, argp);
>  	case BTRFS_IOC_RM_DEV_V2:
> diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
> index d8ea0eb..2b91b95 100644
> --- a/fs/btrfs/raid56.c
> +++ b/fs/btrfs/raid56.c
> @@ -177,6 +177,25 @@ struct btrfs_raid_bio {
>  	unsigned long *dbitmap;
>  };
>  
> +/* raid56 log */
> +struct btrfs_r5l_log {
> +	/* protect this struct and log io */
> +	struct mutex io_mutex;
> +
> +	/* r5log device */
> +	struct btrfs_device *dev;
> +
> +	/* allocation range for log entries */
> +	u64 data_offset;
> +	u64 device_size;
> +
> +	u64 last_checkpoint;
> +	u64 last_cp_seq;
> +	u64 seq;
> +	u64 log_start;
> +	struct btrfs_r5l_io_unit *current_io;
> +};
> +
>  static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
>  static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
>  static void rmw_work(struct btrfs_work *work);
> @@ -2715,3 +2734,26 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
>  	if (!lock_stripe_add(rbio))
>  		async_missing_raid56(rbio);
>  }
> +
> +int btrfs_set_r5log(struct btrfs_fs_info *fs_info, struct btrfs_device *device)
> +{
> +	struct btrfs_r5l_log *log;
> +
> +	log = kzalloc(sizeof(*log), GFP_NOFS);
> +	if (!log)
> +		return -ENOMEM;
> +
> +	/* see find_free_dev_extent for 1M start offset */
> +	log->data_offset = 1024ull * 1024;

Please use SZ_1M define from linux/sizes.h

> +	log->device_size = btrfs_device_get_total_bytes(device) - log->data_offset;
> +	log->device_size = round_down(log->device_size, PAGE_SIZE);
> +	log->dev = device;
> +	mutex_init(&log->io_mutex);
> +
> +	cmpxchg(&fs_info->r5log, NULL, log);
> +	ASSERT(fs_info->r5log == log);
> +
> +	trace_printk("r5log: set a r5log in fs_info,  alloc_range 0x%llx 0x%llx",
> +		     log->data_offset, log->data_offset + log->device_size);
> +	return 0;
> +}
> diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
> index 4ee4fe3..0c8bf6a 100644
> --- a/fs/btrfs/raid56.h
> +++ b/fs/btrfs/raid56.h
> @@ -65,4 +65,5 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
>  
>  int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
>  void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
> +int btrfs_set_r5log(struct btrfs_fs_info *fs_info, struct btrfs_device *device);
>  #endif
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 017b67d..dafc541 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -2313,7 +2313,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
>  	return ret;
>  }
>  
> -int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
> +int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path, const u64 flags)
>  {
>  	struct btrfs_root *root = fs_info->dev_root;
>  	struct request_queue *q;
> @@ -2326,6 +2326,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>  	u64 tmp;
>  	int seeding_dev = 0;
>  	int ret = 0;
> +	bool is_r5log = (flags & BTRFS_DEVICE_RAID56_LOG);
> +
> +	if (is_r5log)
> +		ASSERT(!fs_info->fs_devices->seeding);
>  
>  	if ((sb->s_flags & MS_RDONLY) && !fs_info->fs_devices->seeding)
>  		return -EROFS;
> @@ -2382,6 +2386,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>  	q = bdev_get_queue(bdev);
>  	if (blk_queue_discard(q))
>  		device->can_discard = 1;
> +	if (is_r5log)
> +		device->type |= BTRFS_DEV_RAID56_LOG;
>  	device->writeable = 1;
>  	device->generation = trans->transid;
>  	device->io_width = fs_info->sectorsize;
> @@ -2434,11 +2440,13 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>  	/* add sysfs device entry */
>  	btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
>  
> -	/*
> -	 * we've got more storage, clear any full flags on the space
> -	 * infos
> -	 */
> -	btrfs_clear_space_info_full(fs_info);
> +	if (!is_r5log) {
> +		/*
> +		 * we've got more storage, clear any full flags on the space
> +		 * infos
> +		 */
> +		btrfs_clear_space_info_full(fs_info);
> +	}
>  
>  	mutex_unlock(&fs_info->chunk_mutex);
>  	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> @@ -2459,6 +2467,12 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>  		goto error_trans;
>  	}
>  
> +	if (is_r5log) {
> +		ret = btrfs_set_r5log(fs_info, device);

Nit: Setting the r5log (in the fsinfo) is only one part of the overall
initialisation of the log device, so why not btrfs_r5log_init or
init_r5log?

> +		if (ret)
> +			goto error_trans;
> +	}
> +
>  	if (seeding_dev) {
>  		char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
>  
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index c7d0fbc..60e347a 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -437,7 +437,8 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
>  struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
>  				       u8 *uuid, u8 *fsid);
>  int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
> -int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
> +int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path,
> +			  const u64 flags);
>  int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
>  				  const char *device_path,
>  				  struct btrfs_device *srcdev,
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index a456e53..be5991f 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -35,6 +35,7 @@ struct btrfs_ioctl_vol_args {
>  #define BTRFS_DEVICE_PATH_NAME_MAX 1024
>  
>  #define BTRFS_DEVICE_SPEC_BY_ID		(1ULL << 3)
> +#define BTRFS_DEVICE_RAID56_LOG		(1ULL << 4)
>  
>  #define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED		\
>  			(BTRFS_SUBVOL_CREATE_ASYNC |	\
> @@ -818,5 +819,7 @@ enum btrfs_err_code {
>  				   struct btrfs_ioctl_feature_flags[3])
>  #define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
>  				   struct btrfs_ioctl_vol_args_v2)
> +#define BTRFS_IOC_ADD_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 59, \
> +				   struct btrfs_ioctl_vol_args_v2)
>  
>  #endif /* _UAPI_LINUX_BTRFS_H */
> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> index 10689e1..52fed59 100644
> --- a/include/uapi/linux/btrfs_tree.h
> +++ b/include/uapi/linux/btrfs_tree.h
> @@ -347,6 +347,10 @@ struct btrfs_key {
>  	__u64 offset;
>  } __attribute__ ((__packed__));
>  
> +/* dev_item.type */
> +/* #define BTRFS_DEV_REGULAR	0 */

Why is the regular device commented out?

> +#define BTRFS_DEV_RAID56_LOG	(1ULL << 0)
> +
>  struct btrfs_dev_item {
>  	/* the internal btrfs device id */
>  	__le64 devid;
> 

  reply	other threads:[~2017-08-02 19:25 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-01 16:14 [PATCH 00/14 RFC] Btrfs: Add journal for raid5/6 writes Liu Bo
2017-08-01 16:14 ` [PATCH 01/14] Btrfs: raid56: add raid56 log via add_dev v2 ioctl Liu Bo
2017-08-02 19:25   ` Nikolay Borisov [this message]
2017-08-01 16:14 ` [PATCH 02/14] Btrfs: raid56: do not allocate chunk on raid56 log Liu Bo
2017-08-01 16:14 ` [PATCH 03/14] Btrfs: raid56: detect raid56 log on mount Liu Bo
2017-08-01 16:14 ` [PATCH 04/14] Btrfs: raid56: add verbose debug Liu Bo
2017-08-01 16:14 ` [PATCH 05/14] Btrfs: raid56: add stripe log for raid5/6 Liu Bo
2017-08-01 16:14 ` [PATCH 06/14] Btrfs: raid56: add reclaim support Liu Bo
2017-08-01 16:14 ` [PATCH 07/14] Btrfs: raid56: load r5log Liu Bo
2017-08-01 16:14 ` [PATCH 08/14] Btrfs: raid56: log recovery Liu Bo
2017-08-01 16:14 ` [PATCH 09/14] Btrfs: raid56: add readahead for recovery Liu Bo
2017-08-01 16:14 ` [PATCH 10/14] Btrfs: raid56: use the readahead helper to get page Liu Bo
2017-08-01 16:14 ` [PATCH 11/14] Btrfs: raid56: add csum support Liu Bo
2017-08-01 16:14 ` [PATCH 12/14] Btrfs: raid56: fix error handling while adding a log device Liu Bo
2017-08-01 16:14 ` [PATCH 13/14] Btrfs: raid56: initialize raid5/6 log after adding it Liu Bo
2017-08-01 16:14 ` [PATCH 14/14] Btrfs: raid56: maintain IO order on raid5/6 log Liu Bo
2017-08-01 16:14 ` [PATCH 1/2] Btrfs-progs: add option to add raid5/6 log device Liu Bo
2017-08-01 16:14 ` [PATCH 2/2] Btrfs-progs: introduce super_journal_tail to inspect-dump-super Liu Bo
2017-08-01 17:25 ` [PATCH 00/14 RFC] Btrfs: Add journal for raid5/6 writes Roman Mamedov
2017-08-01 17:03   ` Liu Bo
2017-08-01 17:39   ` Austin S. Hemmelgarn
2017-08-01 17:07     ` Liu Bo
2017-08-02 18:47     ` Chris Mason
2018-05-03 19:16       ` Goffredo Baroncelli
2017-08-01 17:28 ` Hugo Mills
2017-08-01 16:56   ` Liu Bo
2017-08-01 18:15     ` Hugo Mills
2017-08-01 17:42 ` Goffredo Baroncelli
2017-08-01 17:24   ` Liu Bo
2017-08-01 22:14     ` Goffredo Baroncelli
2017-08-02 17:57       ` Liu Bo
2017-08-02 20:41         ` Goffredo Baroncelli
2017-08-02 20:27           ` Liu Bo
2017-08-03  4:02             ` Duncan
2017-08-03  4:40               ` Goffredo Baroncelli
2017-08-23 15:28             ` Chris Murphy
2017-08-23 15:47               ` Austin S. Hemmelgarn
2017-08-25 13:53               ` Goffredo Baroncelli
2017-08-01 21:00 ` Christoph Anton Mitterer
2017-08-01 22:24   ` Goffredo Baroncelli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b200b004-728a-700b-738f-317d2cd7d5e3@suse.com \
    --to=nborisov@suse.com \
    --cc=bo.li.liu@oracle.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).