From: Logan Gunthorpe <logang@deltatee.com>
To: linux-raid@vger.kernel.org, Jes Sorensen <jes@trained-monkey.org>
Cc: Guoqing Jiang <guoqing.jiang@linux.dev>, Xiao Ni <xni@redhat.com>,
Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>,
Coly Li <colyli@suse.de>,
Chaitanya Kulkarni <chaitanyak@nvidia.com>,
Jonmichael Hands <jm@chia.net>,
Stephen Bates <sbates@raithlin.com>,
Martin Oliveira <Martin.Oliveira@eideticom.com>,
David Sloan <David.Sloan@eideticom.com>,
Logan Gunthorpe <logang@deltatee.com>
Subject: [PATCH mdadm v3 5/7] mdadm: Add --write-zeros option for Create
Date: Wed, 21 Sep 2022 14:43:54 -0600 [thread overview]
Message-ID: <20220921204356.4336-6-logang@deltatee.com> (raw)
In-Reply-To: <20220921204356.4336-1-logang@deltatee.com>
Add the --write-zeros option for Create which will send a write zeros
request to all the disks before assembling the array. After zeroing
the array, the disks will be in a known clean state and the initial
sync may be skipped.
Writing zeroes is best used when there is a hardware offload method
to zero the data. But even still, zeroing can take several minutes on
a large device. Because of this, all disks are zeroed in parallel using
their own forked process and a message is printed to the user. The main
process will proceed only after all the zeroing processes have completed
successfully.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
fixup! mdadm: Add --write-zeros option for Create
---
Create.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ReadMe.c | 2 ++
mdadm.c | 9 ++++++
mdadm.h | 7 +++++
4 files changed, 101 insertions(+)
diff --git a/Create.c b/Create.c
index 1e94b90b96bd..31bed3a37a90 100644
--- a/Create.c
+++ b/Create.c
@@ -26,6 +26,8 @@
#include "md_u.h"
#include "md_p.h"
#include <ctype.h>
+#include <fcntl.h>
+#include <sys/wait.h>
static int round_size_and_verify(unsigned long long *size, int chunk)
{
@@ -91,6 +93,73 @@ int default_layout(struct supertype *st, int level, int verbose)
return layout;
}
+static pid_t write_zeroes_fork(int fd, struct shape *s, struct supertype *st,
+ struct mddev_dev *dv)
+
+{
+ unsigned long long offset_bytes, size_bytes;
+ int ret = 0;
+ pid_t pid;
+
+ size_bytes = KIB_TO_BYTES(s->size);
+
+ /*
+ * If size_bytes is zero, this is a zoned raid array where
+ * each disk is of a different size and uses its full
+ * disk. Thus zero the entire disk.
+ */
+ if (!size_bytes && !get_dev_size(fd, dv->devname, &size_bytes))
+ return -1;
+
+ if (dv->data_offset != INVALID_SECTORS)
+ offset_bytes = SEC_TO_BYTES(dv->data_offset);
+ else
+ offset_bytes = SEC_TO_BYTES(st->data_offset);
+
+ pr_info("zeroing data from %lld to %lld on: %s\n",
+ offset_bytes, size_bytes, dv->devname);
+
+ pid = fork();
+ if (pid < 0) {
+ pr_err("Could not fork to zero disks: %m\n");
+ return pid;
+ } else if (pid != 0) {
+ return pid;
+ }
+
+ if (fallocate(fd, FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE,
+ offset_bytes, size_bytes)) {
+ pr_err("zeroing %s failed: %m\n", dv->devname);
+ ret = 1;
+ }
+
+ exit(ret);
+}
+
+static int wait_for_zero_forks(struct mdinfo *info, int count)
+{
+ int wstatus, ret = 0, i;
+ bool waited = false;
+
+ for (i = 0; i < count; i++) {
+ if (!info[i].zero_pid)
+ continue;
+
+ waited = true;
+ waitpid(info[i].zero_pid, &wstatus, 0);
+
+ if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus))
+ ret = 1;
+ }
+
+ if (ret)
+ pr_err("zeroing failed!\n");
+ else if (waited)
+ pr_info("zeroing finished\n");
+
+ return ret;
+}
+
static int add_disk_to_super(int mdfd, struct shape *s, struct context *c,
struct supertype *st, struct mddev_dev *dv,
struct mdinfo *info, int have_container, int major_num)
@@ -148,6 +217,14 @@ static int add_disk_to_super(int mdfd, struct shape *s, struct context *c,
}
st->ss->getinfo_super(st, info, NULL);
+ if (s->write_zeroes) {
+ info->zero_pid = write_zeroes_fork(fd, s, st, dv);
+ if (info->zero_pid <= 0) {
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ return 1;
+ }
+ }
+
if (have_container && c->verbose > 0)
pr_err("Using %s for device %d\n",
map_dev(info->disk.major, info->disk.minor, 0),
@@ -287,6 +364,10 @@ static int add_disks(int mdfd, struct mdinfo *info, struct shape *s,
}
if (pass == 1) {
+ ret = wait_for_zero_forks(infos, total_slots);
+ if (ret)
+ goto out;
+
ret = update_metadata(mdfd, s, st, map, info,
chosen_name);
if (ret)
@@ -295,6 +376,8 @@ static int add_disks(int mdfd, struct mdinfo *info, struct shape *s,
}
out:
+ if (ret)
+ wait_for_zero_forks(infos, total_slots);
free(infos);
return ret;
}
diff --git a/ReadMe.c b/ReadMe.c
index 7f94847e9769..50913dabca40 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -138,6 +138,7 @@ struct option long_options[] = {
{"size", 1, 0, 'z'},
{"auto", 1, 0, Auto}, /* also for --assemble */
{"assume-clean",0,0, AssumeClean },
+ {"write-zeroes",0,0, WriteZeroes },
{"metadata", 1, 0, 'e'}, /* superblock format */
{"bitmap", 1, 0, Bitmap},
{"bitmap-chunk", 1, 0, BitmapChunk},
@@ -390,6 +391,7 @@ char Help_create[] =
" --write-journal= : Specify journal device for RAID-4/5/6 array\n"
" --consistency-policy= : Specify the policy that determines how the array\n"
" -k : maintains consistency in case of unexpected shutdown.\n"
+" --write-zeroes : Write zeroes to the disks before creating. This will bypass initial sync.\n"
"\n"
;
diff --git a/mdadm.c b/mdadm.c
index 972adb524dfb..141838bd394f 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -602,6 +602,10 @@ int main(int argc, char *argv[])
s.assume_clean = 1;
continue;
+ case O(CREATE, WriteZeroes):
+ s.write_zeroes = 1;
+ continue;
+
case O(GROW,'n'):
case O(CREATE,'n'):
case O(BUILD,'n'): /* number of raid disks */
@@ -1306,6 +1310,11 @@ int main(int argc, char *argv[])
}
}
+ if (s.write_zeroes && !s.assume_clean) {
+ pr_info("Disk zeroing requested, setting --assume-clean to skip resync\n");
+ s.assume_clean = 1;
+ }
+
if (!mode && devs_found) {
mode = MISC;
devmode = 'Q';
diff --git a/mdadm.h b/mdadm.h
index 1ab31564efef..c7e00195d8c8 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -273,6 +273,9 @@ static inline void __put_unaligned32(__u32 val, void *p)
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+#define KIB_TO_BYTES(x) ((x) << 10)
+#define SEC_TO_BYTES(x) ((x) << 9)
+
extern const char Name[];
struct md_bb_entry {
@@ -387,6 +390,8 @@ struct mdinfo {
ARRAY_UNKNOWN_STATE,
} array_state;
struct md_bb bb;
+
+ pid_t zero_pid;
};
struct createinfo {
@@ -433,6 +438,7 @@ extern char Version[], Usage[], Help[], OptionHelp[],
*/
enum special_options {
AssumeClean = 300,
+ WriteZeroes,
BitmapChunk,
WriteBehind,
ReAdd,
@@ -593,6 +599,7 @@ struct shape {
int bitmap_chunk;
char *bitmap_file;
int assume_clean;
+ bool write_zeroes;
int write_behind;
unsigned long long size;
unsigned long long data_offset;
--
2.30.2
next prev parent reply other threads:[~2022-09-21 20:44 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-09-21 20:43 [PATCH mdadm v3 0/7] Write Zeroes option for Creating Arrays Logan Gunthorpe
2022-09-21 20:43 ` [PATCH mdadm v3 1/7] Create: goto abort_locked instead of return 1 in error path Logan Gunthorpe
2022-09-21 20:43 ` [PATCH mdadm v3 2/7] Create: remove safe_mode_delay local variable Logan Gunthorpe
2022-09-21 20:43 ` [PATCH mdadm v3 3/7] Create: Factor out add_disks() helpers Logan Gunthorpe
2022-09-21 20:43 ` [PATCH mdadm v3 4/7] mdadm: Introduce pr_info() Logan Gunthorpe
2022-09-21 20:43 ` Logan Gunthorpe [this message]
2022-09-23 11:20 ` [PATCH mdadm v3 5/7] mdadm: Add --write-zeros option for Create Mariusz Tkaczyk
2022-09-30 5:23 ` Xiao Ni
2022-09-30 15:39 ` Logan Gunthorpe
2022-09-21 20:43 ` [PATCH mdadm v3 6/7] tests/00raid5-zero: Introduce test to exercise --write-zeros Logan Gunthorpe
2022-09-21 20:43 ` [PATCH mdadm v3 7/7] manpage: Add --write-zeroes option to manpage Logan Gunthorpe
2022-09-30 5:26 ` [PATCH mdadm v3 0/7] Write Zeroes option for Creating Arrays Xiao Ni
2022-09-30 15:39 ` Logan Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220921204356.4336-6-logang@deltatee.com \
--to=logang@deltatee.com \
--cc=David.Sloan@eideticom.com \
--cc=Martin.Oliveira@eideticom.com \
--cc=chaitanyak@nvidia.com \
--cc=colyli@suse.de \
--cc=guoqing.jiang@linux.dev \
--cc=jes@trained-monkey.org \
--cc=jm@chia.net \
--cc=linux-raid@vger.kernel.org \
--cc=mariusz.tkaczyk@linux.intel.com \
--cc=sbates@raithlin.com \
--cc=xni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.