From: David Sterba <dsterba@suse.cz>
To: Li Zhang <zhanglikernel@gmail.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH V2] Make btrfs_prepare_device parallel during mkfs.btrfs
Date: Tue, 30 Aug 2022 19:30:51 +0200 [thread overview]
Message-ID: <20220830173051.GC13489@twin.jikos.cz> (raw)
In-Reply-To: <1661697000-18809-1-git-send-email-zhanglikernel@gmail.com>
On Sun, Aug 28, 2022 at 10:30:00PM +0800, Li Zhang wrote:
> [enhancement]
> When a disk is formatted as btrfs, it calls
> btrfs_prepare_device for each device, which takes too much time.
>
> [implementation]
> Put each btrfs_prepare_device into a thread,
> wait for the first thread to complete to mkfs.btrfs,
> and wait for other threads to complete before adding
> other devices to the file system.
>
> [test]
> Using the btrfs-progs test case mkfs-tests, mkfs.btrfs works fine.
>
> But I don't have an actual zoed device,
> so I don't know how much time it saves, If you guys
> have a way to test it, please let me know.
Zoned devices can be emulated and backed by normal disk partitions using
eg. TCMU, the memory-backed emulation using null_blk would be probably
too fast to see if the parallelization helps.
> Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
> ---
> Issue: 496
>
> V1:
> * Put btrfs_prepare_device into threads and make them parallel
>
> V2:
> * Set the 4 variables used by btrfs_prepare_device as global variables.
> * Use pthread_mutex to ensure error messages are not messed up.
> * Correct the error message
> * Wait for all threads to exit in a loop
>
> mkfs/main.c | 132 +++++++++++++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 95 insertions(+), 37 deletions(-)
>
> diff --git a/mkfs/main.c b/mkfs/main.c
> index ce096d3..b111f12 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -31,6 +31,7 @@
> #include <uuid/uuid.h>
> #include <ctype.h>
> #include <blkid/blkid.h>
> +#include <pthread.h>
> #include "kernel-shared/ctree.h"
> #include "kernel-shared/disk-io.h"
> #include "kernel-shared/free-space-tree.h"
> @@ -60,6 +61,20 @@ struct mkfs_allocation {
> u64 system;
> };
>
> +static bool zero_end;
> +static bool discard;
> +static bool zoned;
> +static int oflags;
Please add some prefix to the global variables, eg. opt_.
> +static pthread_mutex_t prepare_mutex;
> +
> +struct prepare_device_progress {
> + char *file;
> + u64 dev_block_count;
> + u64 block_count;
> + int ret;
> +};
> +
> static int create_metadata_block_groups(struct btrfs_root *root, bool mixed,
> struct mkfs_allocation *allocation)
> {
> @@ -969,6 +984,30 @@ fail:
> return ret;
> }
>
> +static void *prepare_one_dev(void *ctx)
> +{
> + struct prepare_device_progress *prepare_ctx = ctx;
> + int fd;
> +
> + fd = open(prepare_ctx->file, oflags);
> + if (fd < 0) {
> + pthread_mutex_lock(&prepare_mutex);
> + error("unable to open %s: %m", prepare_ctx->file);
> + pthread_mutex_unlock(&prepare_mutex);
> + prepare_ctx->ret = fd;
> + return NULL;
> + }
> + prepare_ctx->ret = btrfs_prepare_device(fd,
> + prepare_ctx->file, &prepare_ctx->dev_block_count,
> + prepare_ctx->block_count,
> + (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> + (zero_end ? PREP_DEVICE_ZERO_END : 0) |
> + (discard ? PREP_DEVICE_DISCARD : 0) |
> + (zoned ? PREP_DEVICE_ZONED : 0));
Please format the arguments by a few tabs, like that it looks like
several statements.
> + close(fd);
> + return NULL;
> +}
> +
> int BOX_MAIN(mkfs)(int argc, char **argv)
> {
> char *file;
> @@ -984,7 +1023,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> u32 nodesize = 0;
> u32 sectorsize = 0;
> u32 stripesize = 4096;
> - bool zero_end = true;
> + zero_end = true;
> int fd = -1;
> int ret = 0;
> int close_ret;
> @@ -993,11 +1032,10 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> bool nodesize_forced = false;
> bool data_profile_opt = false;
> bool metadata_profile_opt = false;
> - bool discard = true;
> + discard = true;
> bool ssd = false;
> - bool zoned = false;
> + zoned = false;
> bool force_overwrite = false;
> - int oflags;
> char *source_dir = NULL;
> bool source_dir_set = false;
> bool shrink_rootdir = false;
> @@ -1006,6 +1044,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> u64 shrink_size;
> int dev_cnt = 0;
> int saved_optind;
> + pthread_t *t_prepare = NULL;
> + struct prepare_device_progress *prepare_ctx = NULL;
> char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
> u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
> u64 runtime_features = BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES;
> @@ -1428,29 +1468,49 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> goto error;
> }
>
> - dev_cnt--;
> + t_prepare = calloc(dev_cnt, sizeof(*t_prepare));
> + prepare_ctx = calloc(dev_cnt, sizeof(*prepare_ctx));
> +
> + if (!t_prepare || !prepare_ctx) {
> + error("unable to alloc thread for preparing dev");
> + goto error;
> + }
>
> + pthread_mutex_init(&prepare_mutex, NULL);
> + zero_end = zero_end;
> + discard = discard;
> + zoned = zoned;
> oflags = O_RDWR;
> - if (zoned && zoned_model(file) == ZONED_HOST_MANAGED)
> - oflags |= O_DIRECT;
> + for (i = 0; i < dev_cnt; i++) {
> + if (zoned && zoned_model(argv[optind + i - 1]) ==
> + ZONED_HOST_MANAGED) {
> + oflags |= O_DIRECT;
> + break;
> + }
> + }
> + for (i = 0; i < dev_cnt; i++) {
> + prepare_ctx[i].file = argv[optind + i - 1];
> + prepare_ctx[i].block_count = block_count;
> + prepare_ctx[i].dev_block_count = block_count;
> + ret = pthread_create(&t_prepare[i], NULL,
> + prepare_one_dev, &prepare_ctx[i]);
> + if (ret) {
> + error("create thread for prepare devices failed, errno:%d", ret);
The error message could say at which device it failed.
> + goto error;
> + }
> + }
> + for (i = 0; i < dev_cnt; i++)
> + pthread_join(t_prepare[i], NULL);
> + ret = prepare_ctx[0].ret;
>
> - /*
> - * Open without O_EXCL so that the problem should not occur by the
> - * following operation in kernel:
> - * (btrfs_register_one_device() fails if O_EXCL is on)
> - */
> - fd = open(file, oflags);
> - if (fd < 0) {
> - error("unable to open %s: %m", file);
> + if (ret) {
> + error("unable prepare device:%s.\n", prepare_ctx[0].file);
error() appends the "\n" and there should be no "." at the end and there
should be a space after ":".
> goto error;
> }
> - ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
> - (zero_end ? PREP_DEVICE_ZERO_END : 0) |
> - (discard ? PREP_DEVICE_DISCARD : 0) |
> - (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> - (zoned ? PREP_DEVICE_ZONED : 0));
> - if (ret)
> - goto error;
> +
> + dev_cnt--;
> + fd = open(file, oflags);
Where does the error handling happen for this open() ?
> + dev_block_count = prepare_ctx[0].dev_block_count;
> if (block_count && block_count > dev_block_count) {
> error("%s is smaller than requested size, expected %llu, found %llu",
> file, (unsigned long long)block_count,
> @@ -1459,7 +1519,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> }
>
> /* To create the first block group and chunk 0 in make_btrfs */
> - system_group_size = zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
> + system_group_size = zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
> if (dev_block_count < system_group_size) {
> error("device is too small to make filesystem, must be at least %llu",
> (unsigned long long)system_group_size);
> @@ -1558,14 +1618,10 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> goto raid_groups;
>
> while (dev_cnt-- > 0) {
> + int dev_index = argc - saved_optind - dev_cnt - 1;
> file = argv[optind++];
>
> - /*
> - * open without O_EXCL so that the problem should not
> - * occur by the following processing.
> - * (btrfs_register_one_device() fails if O_EXCL is on)
> - */
> - fd = open(file, O_RDWR);
> + fd = open(file, oflags);
> if (fd < 0) {
> error("unable to open %s: %m", file);
> goto error;
> @@ -1578,13 +1634,11 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> close(fd);
> continue;
> }
> - ret = btrfs_prepare_device(fd, file, &dev_block_count,
> - block_count,
> - (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> - (zero_end ? PREP_DEVICE_ZERO_END : 0) |
> - (discard ? PREP_DEVICE_DISCARD : 0) |
> - (zoned ? PREP_DEVICE_ZONED : 0));
> - if (ret) {
> + dev_block_count = prepare_ctx[dev_index]
> + .dev_block_count;
> +
> + if (prepare_ctx[dev_index].ret) {
> + error("unable prepare device:%s.\n", prepare_ctx[dev_index].file);
> goto error;
> }
>
> @@ -1763,12 +1817,16 @@ out:
>
> btrfs_close_all_devices();
> free(label);
> -
> + free(t_prepare);
> + free(prepare_ctx);
> return !!ret;
> +
> error:
> if (fd > 0)
> close(fd);
>
> + free(t_prepare);
> + free(prepare_ctx);
> free(label);
> exit(1);
> success:
> --
> 1.8.3.1
prev parent reply other threads:[~2022-08-30 17:45 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-28 14:30 [PATCH V2] Make btrfs_prepare_device parallel during mkfs.btrfs Li Zhang
2022-08-29 21:55 ` Qu Wenruo
2022-08-30 17:30 ` David Sterba [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220830173051.GC13489@twin.jikos.cz \
--to=dsterba@suse.cz \
--cc=linux-btrfs@vger.kernel.org \
--cc=zhanglikernel@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox