public inbox for linux-raid@vger.kernel.org
 help / color / mirror / Atom feed
From: Logan Gunthorpe <logang@deltatee.com>
To: linux-raid@vger.kernel.org, Jes Sorensen <jes@trained-monkey.org>
Cc: Guoqing Jiang <guoqing.jiang@linux.dev>, Xiao Ni <xni@redhat.com>,
	Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>,
	Coly Li <colyli@suse.de>,
	Chaitanya Kulkarni <chaitanyak@nvidia.com>,
	Jonmichael Hands <jm@chia.net>,
	Stephen Bates <sbates@raithlin.com>,
	Martin Oliveira <Martin.Oliveira@eideticom.com>,
	David Sloan <David.Sloan@eideticom.com>,
	Logan Gunthorpe <logang@deltatee.com>
Subject: [PATCH mdadm v2 1/2] mdadm: Add --discard option for Create
Date: Thu,  8 Sep 2022 17:08:46 -0600	[thread overview]
Message-ID: <20220908230847.5749-2-logang@deltatee.com> (raw)
In-Reply-To: <20220908230847.5749-1-logang@deltatee.com>

Add the --discard option for Create which will send BLKDISCARD requests to
all disks before assembling the array. This is a fast way to know the
current state of all the disks. If the discard request zeros the data
on the disks (as is common but not universal) then the array is in a
state with correct parity. Thus the initial sync may be skipped.

After issuing each discard request, check if the first page of the
block device is zero. If it is, it is safe to assume the entire
disk should be zero. If it's not report an error.

If all the discard requests are successful and there are no missing
disks thin it is safe to set assume_clean as we know the array is clean.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
---
 Create.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 ReadMe.c |  1 +
 mdadm.c  |  4 ++++
 mdadm.h  |  2 ++
 4 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/Create.c b/Create.c
index e06ec2ae96a1..52bb88bccd53 100644
--- a/Create.c
+++ b/Create.c
@@ -26,6 +26,12 @@
 #include	"md_u.h"
 #include	"md_p.h"
 #include	<ctype.h>
+#include	<sys/ioctl.h>
+
+#ifndef BLKDISCARD
+#define BLKDISCARD _IO(0x12,119)
+#endif
+#include	<fcntl.h>
 
 static int round_size_and_verify(unsigned long long *size, int chunk)
 {
@@ -91,6 +97,38 @@ int default_layout(struct supertype *st, int level, int verbose)
 	return layout;
 }
 
+static int discard_device(struct context *c, int fd, const char *devname,
+			  unsigned long long offset, unsigned long long size)
+{
+	uint64_t range[2] = {offset, size};
+	unsigned long buf[4096 / sizeof(unsigned long)];
+	unsigned long i;
+
+	if (c->verbose)
+		printf("discarding data from %lld to %lld on: %s\n",
+		       offset, size, devname);
+
+	if (ioctl(fd, BLKDISCARD, &range)) {
+		pr_err("discard failed on '%s': %m\n", devname);
+		return 1;
+	}
+
+	if (pread(fd, buf, sizeof(buf), offset) != sizeof(buf)) {
+		pr_err("failed to readback '%s' after discard: %m\n", devname);
+		return 1;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(buf); i++) {
+		if (buf[i]) {
+			pr_err("device did not read back zeros after discard on '%s': %lx\n",
+			       devname, buf[i]);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 int Create(struct supertype *st, char *mddev,
 	   char *name, int *uuid,
 	   int subdevs, struct mddev_dev *devlist,
@@ -607,7 +645,7 @@ int Create(struct supertype *st, char *mddev,
 	 * as missing, so that a reconstruct happens (faster than re-parity)
 	 * FIX: Can we do this for raid6 as well?
 	 */
-	if (st->ss->external == 0 && s->assume_clean == 0 &&
+	if (st->ss->external == 0 && s->assume_clean == 0 && s->discard == 0 &&
 	    c->force == 0 && first_missing >= s->raiddisks) {
 		switch (s->level) {
 		case 4:
@@ -624,8 +662,8 @@ int Create(struct supertype *st, char *mddev,
 	/* For raid6, if creating with 1 missing drive, make a good drive
 	 * into a spare, else the create will fail
 	 */
-	if (s->assume_clean == 0 && c->force == 0 && first_missing < s->raiddisks &&
-	    st->ss->external == 0 &&
+	if (s->assume_clean == 0 && s->discard == 0 && c->force == 0 &&
+	    first_missing < s->raiddisks && st->ss->external == 0 &&
 	    second_missing >= s->raiddisks && s->level == 6) {
 		insert_point = s->raiddisks - 1;
 		if (insert_point == first_missing)
@@ -686,7 +724,7 @@ int Create(struct supertype *st, char *mddev,
 	     (insert_point < s->raiddisks || first_missing < s->raiddisks)) ||
 	    (s->level == 6 && (insert_point < s->raiddisks ||
 			       second_missing < s->raiddisks)) ||
-	    (s->level <= 0) || s->assume_clean) {
+	    (s->level <= 0) || s->assume_clean || s->discard) {
 		info.array.state = 1; /* clean, but one+ drive will be missing*/
 		info.resync_start = MaxSector;
 	} else {
@@ -945,6 +983,15 @@ int Create(struct supertype *st, char *mddev,
 				}
 				if (fd >= 0)
 					remove_partitions(fd);
+
+				if (s->discard &&
+				    discard_device(c, fd, dv->devname,
+						   dv->data_offset << 9,
+						   s->size << 10)) {
+					ioctl(mdfd, STOP_ARRAY, NULL);
+					goto abort_locked;
+				}
+
 				if (st->ss->add_to_super(st, &inf->disk,
 							 fd, dv->devname,
 							 dv->data_offset)) {
diff --git a/ReadMe.c b/ReadMe.c
index 7f94847e9769..544a057f83a0 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -138,6 +138,7 @@ struct option long_options[] = {
     {"size",	  1, 0, 'z'},
     {"auto",	  1, 0, Auto}, /* also for --assemble */
     {"assume-clean",0,0, AssumeClean },
+    {"discard",	  0, 0, Discard },
     {"metadata",  1, 0, 'e'}, /* superblock format */
     {"bitmap",	  1, 0, Bitmap},
     {"bitmap-chunk", 1, 0, BitmapChunk},
diff --git a/mdadm.c b/mdadm.c
index 972adb524dfb..049cdce1cdd2 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -602,6 +602,10 @@ int main(int argc, char *argv[])
 			s.assume_clean = 1;
 			continue;
 
+		case O(CREATE, Discard):
+			s.discard = true;
+			continue;
+
 		case O(GROW,'n'):
 		case O(CREATE,'n'):
 		case O(BUILD,'n'): /* number of raid disks */
diff --git a/mdadm.h b/mdadm.h
index 941a5f3823a0..a1e0bc9f01ad 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -433,6 +433,7 @@ extern char Version[], Usage[], Help[], OptionHelp[],
  */
 enum special_options {
 	AssumeClean = 300,
+	Discard,
 	BitmapChunk,
 	WriteBehind,
 	ReAdd,
@@ -593,6 +594,7 @@ struct shape {
 	int	bitmap_chunk;
 	char	*bitmap_file;
 	int	assume_clean;
+	bool	discard;
 	int	write_behind;
 	unsigned long long size;
 	unsigned long long data_offset;
-- 
2.30.2


  reply	other threads:[~2022-09-08 23:08 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-08 23:08 [PATCH mdadm v2 0/2] Discard Option for Creating Arrays Logan Gunthorpe
2022-09-08 23:08 ` Logan Gunthorpe [this message]
2022-09-09  9:57   ` [PATCH mdadm v2 1/2] mdadm: Add --discard option for Create Mariusz Tkaczyk
2022-09-09 11:54     ` Roman Mamedov
     [not found]       ` <CABdXBANrJNWjq4237k9DPRoxLVmiAUoKMZxaaLUrcMHsODwvmA@mail.gmail.com>
2022-09-09 15:31         ` Roman Mamedov
2022-09-12 17:43       ` Martin K. Petersen
2022-09-09 15:47     ` Logan Gunthorpe
2022-09-13  7:35       ` Mariusz Tkaczyk
2022-09-13 15:43         ` Logan Gunthorpe
2022-09-14 12:01           ` Mariusz Tkaczyk
2022-09-14 16:29             ` Logan Gunthorpe
2022-09-14 17:39               ` Mariusz Tkaczyk
2022-09-19  8:41   ` Xiao Ni
2022-09-21 18:45     ` Logan Gunthorpe
2022-09-08 23:08 ` [PATCH mdadm v2 2/2] manpage: Add --discard option to manpage Logan Gunthorpe
2022-09-12 17:40 ` [PATCH mdadm v2 0/2] Discard Option for Creating Arrays Martin K. Petersen
     [not found]   ` <CABdXBAP0LeQMmhSLUMZ_TmnSp5xmZ4xJBkNa7HUm7094m_x9xA@mail.gmail.com>
2022-09-13  3:47     ` Martin K. Petersen
2022-09-13 15:38   ` Logan Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220908230847.5749-2-logang@deltatee.com \
    --to=logang@deltatee.com \
    --cc=David.Sloan@eideticom.com \
    --cc=Martin.Oliveira@eideticom.com \
    --cc=chaitanyak@nvidia.com \
    --cc=colyli@suse.de \
    --cc=guoqing.jiang@linux.dev \
    --cc=jes@trained-monkey.org \
    --cc=jm@chia.net \
    --cc=linux-raid@vger.kernel.org \
    --cc=mariusz.tkaczyk@linux.intel.com \
    --cc=sbates@raithlin.com \
    --cc=xni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox