Linux RAID subsystem development

Linux RAID subsystem development
 help / color / mirror / Atom feed

* [PATCH 1/2] lib/raid6: Build proper files on corresponding arch
From: Matt Brown @ 2017-04-12  1:35 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: linux-raid, dja

Previously the raid6 test Makefile did not correctly build the files for
testing on PowerPC. This patch fixes the bug, so that all appropriate files
for PowerPC are built.

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
 lib/raid6/test/Makefile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 9c333e9..62b26d1 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
         HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
-                         gcc -c -x c - >&/dev/null && \
-                         rm ./-.o && echo yes)
+			 gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
         ifeq ($(HAS_ALTIVEC),yes)
-                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+		CFLAGS += -I../../../arch/powerpc/include
+		CFLAGS += -DCONFIG_ALTIVEC
+		OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+			vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
         endif
 endif
 ifeq ($(ARCH),tilegx)
-- 
2.9.3


^ permalink raw reply related

* [PATCH v3 2/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome
From: Matt Brown @ 2017-04-12  1:35 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: linux-raid, dja
In-Reply-To: <20170412013552.21650-1-matthew.brown.dev@gmail.com>

The raid6 Q syndrome check has been optimised using the vpermxor
instruction. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

	Performance benchmarks:
		raid6: altivecx4 gen() 18773 MB/s
		raid6: altivecx8 gen() 19438 MB/s

		raid6: vpermxor4 gen() 25112 MB/s
	    	raid6: vpermxor8 gen() 26279 MB/s

Note: Fixed minor bug in altivec.uc regarding missing and mismatched ifdef
statements.

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
Changelog
v2
	- Change CONFIG_ALTIVEC to CPU_FTR_ALTIVEC_COMP
	- Seperate bug fix into different patch
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/Makefile      |  27 ++++++++++++-
 lib/raid6/algos.c       |   4 ++
 lib/raid6/altivec.uc    |   3 ++
 lib/raid6/test/Makefile |  14 ++++++-
 lib/raid6/vpermxor.uc   | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 154 insertions(+), 2 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..7775aad 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+				vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_altivec2,
 	&raid6_altivec4,
 	&raid6_altivec8,
+	&raid6_vpermxor1,
+	&raid6_vpermxor2,
+	&raid6_vpermxor4,
+	&raid6_vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
 	&raid6_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include <linux/raid/pq.h>
 
+#ifdef CONFIG_ALTIVEC
+
 #include <altivec.h>
 #ifdef __KERNEL__
 # include <asm/cputable.h>
 # include <asm/switch_to.h>
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..9c333e9 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -97,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
 int1.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=1 < int.uc > $@
 
@@ -122,7 +134,7 @@ tables.c: mktables
 	./mktables > tables.c
 
 clean:
-	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
 	rm -f tilegx*.c
 
 spotless: clean
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 0000000..31a324d
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor opcode to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and the vpermxor opcode.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+					    0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+					    0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+					     0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+					     0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+							void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+	unative_t wp$$, wq$$, wd$$;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	for (d = 0; d < bytes; d += NSIZE*$#) {
+		wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+		for (z = z0-1; z>=0; z--) {
+			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+			/* P syndrome */
+			wp$$ = vec_xor(wp$$, wd$$);
+
+			/*Q syndrome */
+			asm("vpermxor %0,%1,%2,%3":"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+			wq$$ = vec_xor(wq$$, wd$$);
+		}
+		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
+		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
+	}
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+
+	raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+	disable_kernel_altivec();
+	preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+	/* Check if CPU has both altivec and the vpermxor instruction*/
+# ifdef __KERNEL__
+	return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+		cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+	return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+	raid6_vpermxor$#_gen_syndrome,
+	NULL,
+	raid6_have_altivec_vpermxor,
+	"vpermxor$#",
+	0
+};
+#endif
-- 
2.9.3


^ permalink raw reply related

* Re: [md PATCH 00/10] Simplify bio splitting and related code.
From: Shaohua Li @ 2017-04-12  2:51 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid
In-Reply-To: <871ssywmno.fsf@notabene.neil.brown.name>

On Wed, Apr 12, 2017 at 09:27:07AM +1000, Neil Brown wrote:
> On Tue, Apr 11 2017, Shaohua Li wrote:
> 
> > On Wed, Apr 05, 2017 at 02:05:50PM +1000, Neil Brown wrote:
> >> This is part of my little project to make bio splitting
> >> in Linux uniform and dead-lock free, in a way that will mean
> >> that we can get rid of all the bioset threads.
> >> 
> >> The basic approach is that when a bio needs to be split, we call
> >> bio_split(), bio_chain() and then generic_make_request().
> >> We then proceed to handle the remainder without further splitting.
> >> Recent changes to generic_make_request() ensure that this will
> >> be safe from deadlocks, providing each bioset is used only once
> >> in the stack.
> >> 
> >> This leads to simpler code in various places.  In particular, the
> >> splitting of bios that is needed to work around known bad blocks
> >> is now much less complex.  There is only ever one r1bio per bio.
> >> 
> >> As you can see from
> >>  10 files changed, 335 insertions(+), 540 deletions(-)
> >> there is a net reduction in code.
> >
> > Looks good and makes code simpler, applied, thanks Neil! The patch 1 and 6 need
> > comments in the code to explain how deadlock is avoided though. Care to send a
> > new patch?
> 
> It isn't clear to me what sort of comment you want, or where it should
> go.
> It might make sense to have a comment near bio_split() explaining how to
> use it (i.e. explaining the pattern used in various patches here), but
> I don't see what sort of comments would help in raid1.c or raid10.c
> ??

Both raid1.c and raid10.c have comments why we need offload the bio to
raid1d/raid10d to avoid deadlock before, we also have comments to explain why
we do bio_split() and then generic_make_request() before. Now these info are
lost, so I hope we can add it back why the new way (bio_split and follow
generic_make_request of next part) can avoid deadlock. That will be very
helpful for others.

Thanks,
Shaohua

^ permalink raw reply

* [md PATCH 0/2] Make it possible to disable create_on_open semantics.
From: NeilBrown @ 2017-04-12  6:26 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, Coly Li

Currently, opening an md /dev node will create the array object.
This makes it hard to destroy the object as udev will typically
re-open the device node when handling REMOVE events.

The "new_array" module parameter was created to work towards avoiding
this problem, and it can be used when
  CREATE names=yes

is given in /etc/mdadm.conf.
How this doesn't currently support names like "md%d", which lots of
people use and expect, so we need more work before we can transition
away from create_on_open.

These patches add support to "new_array" so that md%d devices
can be created.  This will make it, once again, possible to have
md%d devices with numbers > 511. (3.17 make this impossible).

An enhancement to mdadm that uses this will cause new_array to always
be used (where available), and we can then disable create_on_open
completely (after suitable transition periods).

NeilBrown

---

NeilBrown (2):
      md: allow creation of mdNNN arrays via md_mod/parameters/new_array
      md: support disabling of create-on-open semantics.

 drivers/md/md.c |   48 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 7 deletions(-)

--
Signature

^ permalink raw reply

* [md PATCH 1/2] md: allow creation of mdNNN arrays via md_mod/parameters/new_array
From: NeilBrown @ 2017-04-12  6:26 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, Coly Li
In-Reply-To: <149197804398.19936.12809382889200123725.stgit@noble>

The intention when creating the "new_array" parameter and the
possibility of having array names line "md_HOME" was to transition
away from the old way of creating arrays and to eventually only use
this new way.

The "old" way of creating array is to create a device node in /dev
and then open it.  The act of opening creates the array.
This is problematic because sometimes the device node can be opened
when we don't want to create an array.  This can easily happen
when some rule triggered by udev looks at a device as it is being
destroyed.  The node in /dev continues to exist for a short period
after an array is stopped, and opening it during this time recreates
the array (as an inactive array).

Unfortunately no clear plan for the transition was created.  It is now
time to fix that.

This patch allows devices with numeric names, like "md999" to be
created by writing to "new_array".  This will only work if the minor
number given is not already in use.  This will allow mdadm to
support the creation of arrays with numbers > 511 (currently not
possible) by writing to new_array.
mdadm can, at some point, use this approach to create *all* arrays,
which will allow the transition to only using the new-way.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/md/md.c |   34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9fe930109012..c3d3bae947a1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5164,6 +5164,14 @@ static void no_op(struct percpu_ref *r) {}
 
 static int md_alloc(dev_t dev, char *name)
 {
+	/* If dev is zero, name is the name of a device to allocate with
+	 * an arbitrary minor number.  It will be "md_???"
+	 * If dev is non-zero it must be a device number with a MAJOR of
+	 * MD_MAJOR or mdp_major.  In this case, if "name" is NULL, then
+	 * the device is being created by opening a node in /dev.
+	 * If "name" is not NULL, the device is being created by
+	 * writing to /sys/module/md_mod/parameters/new_array.
+	 */
 	static DEFINE_MUTEX(disks_mutex);
 	struct mddev *mddev = mddev_find(dev);
 	struct gendisk *disk;
@@ -5189,7 +5197,7 @@ static int md_alloc(dev_t dev, char *name)
 	if (mddev->gendisk)
 		goto abort;
 
-	if (name) {
+	if (name && !dev) {
 		/* Need to ensure that 'name' is not a duplicate.
 		 */
 		struct mddev *mddev2;
@@ -5203,6 +5211,11 @@ static int md_alloc(dev_t dev, char *name)
 			}
 		spin_unlock(&all_mddevs_lock);
 	}
+	if (name && dev)
+		/*
+		 * Creating /dev/mdNNN via "newarray", so adjust hold_active.
+		 */
+		mddev->hold_active = UNTIL_STOP;
 
 	error = -ENOMEM;
 	mddev->queue = blk_alloc_queue(GFP_KERNEL);
@@ -5279,21 +5292,30 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
 
 static int add_named_array(const char *val, struct kernel_param *kp)
 {
-	/* val must be "md_*" where * is not all digits.
-	 * We allocate an array with a large free minor number, and
+	/* val must be "md_*" or "mdNNN".
+	 * For "md_*" we allocate an array with a large free minor number, and
 	 * set the name to val.  val must not already be an active name.
+	 * For "mdNNN" we allocate an array with the minor number NNN
+	 * which must not already be in use.
 	 */
 	int len = strlen(val);
 	char buf[DISK_NAME_LEN];
+	unsigned long devnum;
 
 	while (len && val[len-1] == '\n')
 		len--;
 	if (len >= DISK_NAME_LEN)
 		return -E2BIG;
 	strlcpy(buf, val, len+1);
-	if (strncmp(buf, "md_", 3) != 0)
-		return -EINVAL;
-	return md_alloc(0, buf);
+	if (strncmp(buf, "md_", 3) == 0)
+		return md_alloc(0, buf);
+	if (strncmp(buf, "md", 2) == 0 &&
+	    isdigit(buf[2]) &&
+	    kstrtoul(buf+2, 10, &devnum) == 0 &&
+	    devnum <= MINORMASK)
+		return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
+
+	return -EINVAL;
 }
 
 static void md_safemode_timeout(unsigned long data)



^ permalink raw reply related

* [md PATCH 2/2] md: support disabling of create-on-open semantics.
From: NeilBrown @ 2017-04-12  6:26 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-raid, Coly Li
In-Reply-To: <149197804398.19936.12809382889200123725.stgit@noble>

md allows a new array device to be created by simply
opening a device file.  This make it difficult to
remove the device and udev is likely to open the device file
as part of processing the REMOVE event.

There is an alternate mechanism for creating arrays
by writing to the new_array module parameter.
When using tools that work with this parameter, it is
best to disable the old semantics.
This new module parameter allows that.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/md/md.c |   14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index c3d3bae947a1..a7ab769eacc3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -174,6 +174,16 @@ static const struct block_device_operations md_fops;

 static int start_readonly;

+/*
+ * The original mechanism for creating an md device is to create
+ * a device node in /dev and to open it.  This causes races with device-close.
+ * The preferred method is to write to the "new_array" module parameter.
+ * This can avoid races.
+ * Setting create_on_open to false disables the original mechanism
+ * so all the races disappear.
+ */
+static bool create_on_open = true;
+
 /* bio_clone_mddev
  * like bio_clone_bioset, but with a local bio set
  */
@@ -5286,7 +5296,8 @@ static int md_alloc(dev_t dev, char *name)

 static struct kobject *md_probe(dev_t dev, int *part, void *data)
 {
-	md_alloc(dev, NULL);
+	if (create_on_open)
+		md_alloc(dev, NULL);
 	return NULL;
 }

@@ -9202,6 +9213,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
 module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
 module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
 module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
+module_param(create_on_open, bool, S_IRUSR|S_IWUSR);

 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MD RAID framework");

^ permalink raw reply related

* Re: [PATCH 1/2] lib/raid6: Build proper files on corresponding arch
From: Michael Ellerman @ 2017-04-12  7:01 UTC (permalink / raw)
  To: Matt Brown, linuxppc-dev; +Cc: linux-raid, dja
In-Reply-To: <20170412013552.21650-1-matthew.brown.dev@gmail.com>

Matt Brown <matthew.brown.dev@gmail.com> writes:

> diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
> index 9c333e9..62b26d1 100644
> --- a/lib/raid6/test/Makefile
> +++ b/lib/raid6/test/Makefile
> @@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
>          CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
>  else
>          HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
> -                         gcc -c -x c - >&/dev/null && \
> -                         rm ./-.o && echo yes)
> +			 gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
>          ifeq ($(HAS_ALTIVEC),yes)
> -                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
> +		CFLAGS += -I../../../arch/powerpc/include
> +		CFLAGS += -DCONFIG_ALTIVEC
> +		OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
> +			vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o

The whitespace in here is a bit of a mess, but you should follow what's
there and use spaces to indent your additions.

cheers

^ permalink raw reply

* [PATCH v1 1/2] mdadm/manpage:update manpage for readonly parameter
From: Zhilong Liu @ 2017-04-12  8:36 UTC (permalink / raw)
  To: Jes.Sorensen; +Cc: linux-raid, Zhilong Liu
In-Reply-To: <6afe1397-3063-c5d7-58ba-f3dbdfa05336@gmail.com>

update readonly in manpage:
Currently both the readwrite and readonly are worked well,
update the readonly section.
One commit in linux/driver/md. Cleared "MD_CLOSING bit" to
Fixes: af8d8e6f0315 ("md: changes for MD_STILL_CLOSED flag")

Signed-off-by: Zhilong Liu <zlliu@suse.com>
---
 mdadm.8.in | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mdadm.8.in b/mdadm.8.in
index 744c12b..f10a8b8 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -925,7 +925,8 @@ will not try to be so clever.
 Start the array
 .B read only
 rather than read-write as normal.  No writes will be allowed to the
-array, and no resync, recovery, or reshape will be started.
+array, and no resync, recovery, or reshape will be started. It works with
+Create, Assemble, Manage and Misc mode.
 
 .TP
 .BR \-a ", " "\-\-auto{=yes,md,mdp,part,p}{NN}"
@@ -2232,7 +2233,7 @@ be in use.
 
 .TP
 .B \-\-readonly
-start the array readonly \(em not supported yet.
+start the array in readonly mode.
 
 .SH MANAGE MODE
 .HP 12
-- 
2.6.6


^ permalink raw reply related

* [PATCH v1 2/2] mdadm/manpage:clustered arrays don't support array-size yet
From: Zhilong Liu @ 2017-04-12  8:37 UTC (permalink / raw)
  To: Jes.Sorensen; +Cc: linux-raid, Zhilong Liu
In-Reply-To: <1d2103a9-a7f2-54f8-ec51-ad0a04c6d9ae@gmail.com>

Update manpage for array-size section:
Clustered arrays don't support the --array-size yet.

Signed-off-by: Zhilong Liu <zlliu@suse.com>
---
 mdadm.8.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mdadm.8.in b/mdadm.8.in
index f10a8b8..fb99a5c 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -541,6 +541,8 @@ A value of
 restores the apparent size of the array to be whatever the real
 amount of available space is.
 
+Clustered arrays do not support this parameter yet.
+
 .TP
 .BR \-c ", " \-\-chunk=
 Specify chunk size of kilobytes.  The default when creating an
-- 
2.6.6


^ permalink raw reply related

* Re: [PATCH 1/2] lib/raid6: Build proper files on corresponding arch
From: Daniel Axtens @ 2017-04-12  9:27 UTC (permalink / raw)
  To: Michael Ellerman, Matt Brown, linuxppc-dev; +Cc: linux-raid
In-Reply-To: <87wpaqt8ia.fsf@concordia.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:

> Matt Brown <matthew.brown.dev@gmail.com> writes:
>
>> diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
>> index 9c333e9..62b26d1 100644
>> --- a/lib/raid6/test/Makefile
>> +++ b/lib/raid6/test/Makefile
>> @@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
>>          CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
>>  else
>>          HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
>> -                         gcc -c -x c - >&/dev/null && \
>> -                         rm ./-.o && echo yes)
>> +			 gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
>>          ifeq ($(HAS_ALTIVEC),yes)
>> -                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
>> +		CFLAGS += -I../../../arch/powerpc/include
>> +		CFLAGS += -DCONFIG_ALTIVEC
>> +		OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
>> +			vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
>
> The whitespace in here is a bit of a mess, but you should follow what's
> there and use spaces to indent your additions.

My apologies for steering you in the wrong direction here Matt!

Also, should the changes to altivec.uc in patch 2 be part of this patch?
From memory they are also needed to run the tests?

Regards,
Daniel
>
> cheers

^ permalink raw reply

* Linux software raid troubles
From: linuxknight @ 2017-04-12 14:06 UTC (permalink / raw)
  To: linux-raid

Last weekend I was moving a server with a raid1 configuration,
controlled by a Intel Corporation 82801 SATA RAID Controller.  Upon
reboot I noticed the degraded message (server hadnt been rebooted in a
couple years).

The raid1 array was two 500gb black WD drives.  I wasnt able to locate
an identical 500gb disk, but did find a 2TB just to get things
mirrored again.  The bios screen accepted the replacement disk and
said it would rebuild in the OS.  mdsync seemed to do its thing but I
noticed mdmon process was taking 200% cpu.  I let it go a few days
thinking it was just taking longer than normal to sync, then rebooted.
It was in a complete failed state and wouldnt boot at all.  After
removing the 2TB disk I was able to boot into the OS again.  I just
assumed I needed a similar drive size for the second part of the
mirror.

Today I installed an identical black WD 500gb drive and its doing the
same behavior.  Currently running a bad block check but in the
meantime I found the wiki and read up a bit on some basic
troubleshooting and asking for help
(https://raid.wiki.kernel.org/index.php/Asking_for_help)

I wanted to attach the output of the commands on that page and hope
someone may have some ideas for rebuilding this second drive.  Thank
you in advance for any suggestions.  Im concerned at this point I only
have one good drive and could possibly lose everything if that failed.

mail:~ # smartctl --xall /dev/sda
smartctl 6.0 2012-10-10 r3643 [i686-linux-3.1.10-1.29-pae] (SUSE RPM)
Copyright (C) 2002-12, Bruce Allen, Christian Franke, www.smartmontools.org

=== START OF INFORMATION SECTION ===
Model Family:     Western Digital Caviar Black
Device Model:     WDC WD5002AALX-00J37A0
Serial Number:    WD-WMAYUL169523
LU WWN Device Id: 5 0014ee 104a23be3
Firmware Version: 15.01H15
User Capacity:    500,107,862,016 bytes [500 GB]
Sector Size:      512 bytes logical/physical
Device is:        In smartctl database [for details use: -P show]
ATA Version is:   ATA8-ACS (minor revision not indicated)
SATA Version is:  SATA 3.0, 6.0 Gb/s (current: 3.0 Gb/s)
Local Time is:    Wed Apr 12 09:33:54 2017 EDT
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
AAM feature is:   Unavailable
APM feature is:   Unavailable
Rd look-ahead is: Enabled
Write cache is:   Enabled
ATA Security is:  Disabled, frozen [SEC2]

=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED

General SMART Values:
Offline data collection status:  (0x82) Offline data collection activity
                                        was completed without error.
                                        Auto Offline Data Collection: Enabled.
Self-test execution status:      (   0) The previous self-test routine completed
                                        without error or no self-test has ever
                                        been run.
Total time to complete Offline
data collection:                ( 8280) seconds.
Offline data collection
capabilities:                    (0x7b) SMART execute Offline immediate.
                                        Auto Offline data collection
on/off supp

                          ort.
                                        Suspend Offline collection upon new
                                        command.
                                        Offline surface scan supported.
                                        Self-test supported.
                                        Conveyance Self-test supported.
                                        Selective Self-test supported.
SMART capabilities:            (0x0003) Saves SMART data before entering
                                        power-saving mode.
                                        Supports SMART auto save timer.
Error logging capability:        (0x01) Error logging supported.
                                        General Purpose Logging supported.
Short self-test routine
recommended polling time:        (   2) minutes.
Extended self-test routine
recommended polling time:        (  84) minutes.
Conveyance self-test routine
recommended polling time:        (   5) minutes.
SCT capabilities:              (0x3037) SCT Status supported.
                                        SCT Feature Control supported.
                                        SCT Data Table supported.

SMART Attributes Data Structure revision number: 16
Vendor Specific SMART Attributes with Thresholds:
ID# ATTRIBUTE_NAME          FLAGS    VALUE WORST THRESH FAIL RAW_VALUE
  1 Raw_Read_Error_Rate     POSR-K   200   200   051    -    273
  3 Spin_Up_Time            POS--K   144   144   021    -    3783
  4 Start_Stop_Count        -O--CK   100   100   000    -    42
  5 Reallocated_Sector_Ct   PO--CK   200   200   140    -    0
  7 Seek_Error_Rate         -OSR-K   200   200   000    -    0
  9 Power_On_Hours          -O--CK   046   046   000    -    39646
 10 Spin_Retry_Count        -O--CK   100   253   000    -    0
 11 Calibration_Retry_Count -O--CK   100   253   000    -    0
 12 Power_Cycle_Count       -O--CK   100   100   000    -    39
192 Power-Off_Retract_Count -O--CK   200   200   000    -    36
193 Load_Cycle_Count        -O--CK   200   200   000    -    5
194 Temperature_Celsius     -O---K   104   104   000    -    39
196 Reallocated_Event_Count -O--CK   200   200   000    -    0
197 Current_Pending_Sector  -O--CK   200   200   000    -    9
198 Offline_Uncorrectable   ----CK   200   200   000    -    7
199 UDMA_CRC_Error_Count    -O--CK   200   200   000    -    0
200 Multi_Zone_Error_Rate   ---R--   200   200   000    -    15
                            ||||||_ K auto-keep
                            |||||__ C event count
                            ||||___ R error rate
                            |||____ S speed/performance
                            ||_____ O updated online
                            |______ P prefailure warning

General Purpose Log Directory Version 1
SMART           Log Directory Version 1 [multi-sector log support]
GP/S  Log at address 0x00 has    1 sectors [Log Directory]
SMART Log at address 0x01 has    1 sectors [Summary SMART error log]
SMART Log at address 0x02 has    5 sectors [Comprehensive SMART error log]
GP    Log at address 0x03 has    6 sectors [Ext. Comprehensive SMART error log]
SMART Log at address 0x06 has    1 sectors [SMART self-test log]
GP    Log at address 0x07 has    1 sectors [Extended self-test log]
SMART Log at address 0x09 has    1 sectors [Selective self-test log]
GP    Log at address 0x10 has    1 sectors [NCQ Command Error log]
GP    Log at address 0x11 has    1 sectors [SATA Phy Event Counters]
GP/S  Log at address 0x80 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x81 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x82 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x83 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x84 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x85 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x86 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x87 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x88 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x89 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8a has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8b has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8c has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8d has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8e has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8f has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x90 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x91 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x92 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x93 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x94 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x95 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x96 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x97 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x98 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x99 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9a has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9b has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9c has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9d has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9e has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9f has   16 sectors [Host vendor specific log]
GP/S  Log at address 0xa0 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa1 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa2 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa3 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa4 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa5 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa6 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa7 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa8 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xa9 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xaa has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xab has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xac has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xad has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xae has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xaf has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb0 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb1 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb2 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb3 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb4 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb5 has    1 sectors [Device vendor specific log]
GP    Log at address 0xb6 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb7 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xbd has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xc0 has    1 sectors [Device vendor specific log]
GP    Log at address 0xc1 has   24 sectors [Device vendor specific log]
GP/S  Log at address 0xe0 has    1 sectors [SCT Command/Status]
GP/S  Log at address 0xe1 has    1 sectors [SCT Data Transfer]

SMART Extended Comprehensive Error Log Version: 1 (6 sectors)
Device Error Count: 209 (device log contains only the most recent 24 errors)
        CR     = Command Register
        FEATR  = Features Register
        COUNT  = Count (was: Sector Count) Register
        LBA_48 = Upper bytes of LBA High/Mid/Low Registers ]  ATA-8
        LH     = LBA High (was: Cylinder High) Register    ]   LBA
        LM     = LBA Mid (was: Cylinder Low) Register      ] Register
        LL     = LBA Low (was: Sector Number) Register     ]
        DV     = Device (was: Device/Head) Register
        DC     = Device Control Register
        ER     = Error register
        ST     = Status register
Powered_Up_Time is measured from power on, and printed as
DDd+hh:mm:SS.sss where DD=days, hh=hours, mm=minutes,
SS=sec, and sss=millisec. It "wraps" after 49.710 days.

Error 209 [16] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:11:05.460  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:11:05.460  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:11:05.460  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:11:05.457  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:11:05.457  SET
FEATURES [Set tra

                                nsfer mode]

Error 208 [15] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:11:03.702  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:11:03.702  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:11:03.702  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:11:03.701  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:11:03.701  SET
FEATURES [Set tra

                                nsfer mode]

Error 207 [14] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:11:01.947  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:11:01.947  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:11:01.947  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:11:01.944  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:11:01.944  SET
FEATURES [Set tra

                                nsfer mode]

Error 206 [13] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:11:00.189  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:11:00.189  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:11:00.189  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:11:00.188  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:11:00.188  SET
FEATURES [Set tra

                                nsfer mode]

Error 205 [12] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:10:58.434  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:10:58.434  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:58.434  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:10:58.431  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:10:58.431  SET
FEATURES [Set tra

                                nsfer mode]

Error 204 [11] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:10:56.681  READ FPDMA QUEUED
  ea 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:56.660  FLUSH CACHE EXT
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:10:56.659  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:56.659  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:10:56.658  IDENTIFY DEVICE

Error 203 [10] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:10:54.903  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:10:54.903  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:54.903  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:10:54.901  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:10:54.901  SET
FEATURES [Set tra

                                nsfer mode]

Error 202 [9] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 ho

                           urs)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:10:53.148  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:10:53.147  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:53.146  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:10:53.145  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:10:53.145  SET
FEATURES [Set tra

                                nsfer mode]

SMART Extended Self-test Log Version: 1 (1 sectors)
No self-tests have been logged.  [To run self-tests, use: smartctl -t]

SMART Selective self-test log data structure revision number 1
 SPAN  MIN_LBA  MAX_LBA  CURRENT_TEST_STATUS
    1        0        0  Not_testing
    2        0        0  Not_testing
    3        0        0  Not_testing
    4        0        0  Not_testing
    5        0        0  Not_testing
Selective self-test flags (0x0):
  After scanning selected spans, do NOT read-scan remainder of disk.
If Selective self-test is pending on power-up, resume after 0 minute delay.

SCT Status Version:                  3
SCT Version (vendor specific):       258 (0x0102)
SCT Support Level:                   1
Device State:                        Active (0)
Current Temperature:                    39 Celsius
Power Cycle Min/Max Temperature:     30/39 Celsius
Lifetime    Min/Max Temperature:      0/39 Celsius
Under/Over Temperature Limit Count:   0/0
SCT Temperature History Version:     2
Temperature Sampling Period:         1 minute
Temperature Logging Interval:        1 minute
Min/Max recommended Temperature:      0/60 Celsius
Min/Max Temperature Limit:           -41/85 Celsius
Temperature History Size (Index):    478 (368)

Index    Estimated Time   Temperature Celsius
 369    2017-04-12 01:36    29  **********
 ...    ..(198 skipped).    ..  **********
  90    2017-04-12 04:55    29  **********
  91    2017-04-12 04:56    28  *********
  92    2017-04-12 04:57    29  **********
 ...    ..( 71 skipped).    ..  **********
 164    2017-04-12 06:09    29  **********
 165    2017-04-12 06:10     ?  -
 166    2017-04-12 06:11    30  ***********
 167    2017-04-12 06:12    30  ***********
 168    2017-04-12 06:13    30  ***********
 169    2017-04-12 06:14    31  ************
 170    2017-04-12 06:15    32  *************
 ...    ..(  3 skipped).    ..  *************
 174    2017-04-12 06:19    32  *************
 175    2017-04-12 06:20    33  **************
 176    2017-04-12 06:21    33  **************
 177    2017-04-12 06:22    33  **************
 178    2017-04-12 06:23    34  ***************
 179    2017-04-12 06:24    34  ***************
 180    2017-04-12 06:25    35  ****************
 ...    ..(  8 skipped).    ..  ****************
 189    2017-04-12 06:34    35  ****************
 190    2017-04-12 06:35    36  *****************
 ...    ..( 23 skipped).    ..  *****************
 214    2017-04-12 06:59    36  *****************
 215    2017-04-12 07:00    37  ******************
 ...    ..(  4 skipped).    ..  ******************
 220    2017-04-12 07:05    37  ******************
 221    2017-04-12 07:06    38  *******************
 222    2017-04-12 07:07    37  ******************
 223    2017-04-12 07:08    38  *******************
 ...    ..(  6 skipped).    ..  *******************
 230    2017-04-12 07:15    38  *******************
 231    2017-04-12 07:16    37  ******************
 232    2017-04-12 07:17    38  *******************
 ...    ..( 14 skipped).    ..  *******************
 247    2017-04-12 07:32    38  *******************
 248    2017-04-12 07:33    39  ********************
 249    2017-04-12 07:34    39  ********************
 250    2017-04-12 07:35    38  *******************
 251    2017-04-12 07:36    39  ********************
 ...    ..(  4 skipped).    ..  ********************
 256    2017-04-12 07:41    39  ********************
 257    2017-04-12 07:42    29  **********
 ...    ..(110 skipped).    ..  **********
 368    2017-04-12 09:33    29  **********

SCT Error Recovery Control command not supported

Device Statistics (GP Log 0x04) not supported

SATA Phy Event Counters (GP Log 0x11)
ID      Size     Value  Description
0x0001  2            0  Command failed due to ICRC error
0x0002  2            0  R_ERR response for data FIS
0x0003  2            0  R_ERR response for device-to-host data FIS
0x0004  2            0  R_ERR response for host-to-device data FIS
0x0005  2            0  R_ERR response for non-data FIS
0x0006  2            0  R_ERR response for device-to-host non-data FIS
0x0007  2            0  R_ERR response for host-to-device non-data FIS
0x000a  2            7  Device-to-host register FISes sent due to a COMRESET
0x000b  2            0  CRC errors within host-to-device FIS
0x8000  4         5831  Vendor specific

mail:~ # mdadm --examine /dev/sda
/dev/sda:
          Magic : Intel Raid ISM Cfg Sig.
        Version : 1.1.00
    Orig Family : 80d98105
         Family : 68a98654
     Generation : 00b83763
     Attributes : All supported
           UUID : 81a6fcf3:48d205e9:aa868e3f:9ad94fa5
       Checksum : 7e0e85bb correct
    MPB Sectors : 2
          Disks : 3
   RAID Devices : 1

[Volume0]:
           UUID : 44c0fda9:b2d38c01:e48120f6:4bed6635
     RAID Level : 1
        Members : 2
          Slots : [__]
    Failed disk : 1
      This Slot : ?
     Array Size : 976766976 (465.76 GiB 500.10 GB)
   Per Dev Size : 976767240 (465.76 GiB 500.10 GB)
  Sector Offset : 0
    Num Stripes : 3815496
     Chunk Size : 64 KiB
       Reserved : 0
  Migrate State : idle
      Map State : failed
    Dirty State : dirty

  Disk00 Serial : WD-WMAYUL169523
          State : active failed
             Id : 00040000
    Usable Size : 976766862 (465.76 GiB 500.10 GB)

  Disk01 Serial : WD-WCC6Y1VENZK4
          State : active failed
             Id : 00050000
    Usable Size : 976766862 (465.76 GiB 500.10 GB)

  Disk02 Serial : Z4Z6V3CV:0
          State : active failed
             Id : ffffffff
    Usable Size : 3907022862 (1863.01 GiB 2000.40 GB)

    Disk Serial : WD-WMAYUL169523
          State : active failed
             Id : 00040000
    Usable Size : 976766862 (465.76 GiB 500.10 GB)
mail:~ # mdadm --detail /dev/sda
mdadm: /dev/sda does not appear to be an md device
mail:~ # mdadm --detail /dev/md126
md126    md126p1  md126p2
mail:~ # mdadm --detail /dev/md126
md126    md126p1  md126p2
mail:~ # mdadm --detail /dev/md126
/dev/md126:
      Container : /dev/md127, member 0
     Raid Level : raid1
     Array Size : 488383488 (465.76 GiB 500.10 GB)
  Used Dev Size : 488383620 (465.76 GiB 500.10 GB)
   Raid Devices : 2
  Total Devices : 1

          State : clean, degraded
 Active Devices : 1
Working Devices : 1
 Failed Devices : 0
  Spare Devices : 0


           UUID : 44c0fda9:b2d38c01:e48120f6:4bed6635
    Number   Major   Minor   RaidDevice State
       1       8        0        0      active sync   /dev/sda
       1       0        0        1      removed
mail:~ # mdadm --detail /dev/md127
/dev/md127:
        Version : imsm
     Raid Level : container
  Total Devices : 2

Working Devices : 2


           UUID : 81a6fcf3:48d205e9:aa868e3f:9ad94fa5
  Member Arrays : /dev/md126

    Number   Major   Minor   RaidDevice

       0       8       16        -        /dev/sdb
       1       8        0        -        /dev/sda
mail:~/lsdrv # ./lsdrv
PCI [ahci] 00:1f.2 RAID bus controller: Intel Corporation 82801 SATA
RAID Controller (rev 05)
├scsi 0:0:0:0 ATAPI    iHAS424   B      {3524253_2N8147500192}
│└sr0 1.00g [11:0] Empty/Unknown
├scsi 1:x:x:x [Empty]
├scsi 2:x:x:x [Empty]
├scsi 3:x:x:x [Empty]
├scsi 4:0:0:0 ATA      WDC WD5002AALX-0 {WD-WMAYUL169523}
│└sda 465.76g [8:0] isw_raid_member
│ ├md126 465.76g [9:126] MD vexternal:/md127/0 raid1 (2) active
DEGRADED, 64k Chunk, recover (none) none
{44c0fda9:b2d38c01:e48120f6:4bed6635}
│ ││                     Partitioned (dos)
│ │├md126p1 4.01g [259:0] swap {57b97914-1b5f-4ac9-b7ca-c0e866535f68}
│ │└md126p2 461.75g [259:1] Partitioned (dos)
{bc3d52aa-a6d5-49a5-ab72-333b8dd5bc6d}
│ │ └Mounted as /dev/md126p2 @ /
│ ├md127 0.00k [9:127] MD vexternal:imsm  () inactive, None (None)
None {81a6fcf3:48d205e9:aa868e3f:9ad94fa5}
│ │                    Empty/Unknown
│ ├sda1 4.01g [8:1] swap {57b97914-1b5f-4ac9-b7ca-c0e866535f68}
│ └sda2 461.75g [8:2] Partitioned (dos) {bc3d52aa-a6d5-49a5-ab72-333b8dd5bc6d}
└scsi 5:0:0:0 ATA      WDC WD5003AZEX-0 {WD-WCC6Y1VENZK4}
 └sdb 465.76g [8:16] isw_raid_member
  └md127 0.00k [9:127] MD vexternal:imsm  () inactive, None (None)
None {81a6fcf3:48d205e9:aa868e3f:9ad94fa5}
                       Empty/Unknown
PCI [sata_sil24] 04:00.0 RAID bus controller: Silicon Image, Inc. SiI
3124 PCI-X Serial ATA Controller (rev 02)
├scsi 6:x:x:x [Empty]
├scsi 7:x:x:x [Empty]
├scsi 8:x:x:x [Empty]
└scsi 9:x:x:x [Empty]
mail:~/lsdrv # cat /proc/mdstat
Personalities : [raid1] [raid0] [raid10] [raid6] [raid5] [raid4]
md126 : active raid1 sda[1]
      488383488 blocks super external:/md127/0 [2/1] [U_]

md127 : inactive sda[1](S) sdb[0](S)
      5928 blocks super external:imsm

unused devices: <none>
mail:~/lsdrv #

^ permalink raw reply

* Linux software raid troubles
From: linuxknight @ 2017-04-12 14:31 UTC (permalink / raw)
  To: linux-raid

Last weekend I was moving a server with a raid1 configuration,
controlled by a Intel Corporation 82801 SATA RAID Controller.  Upon
reboot I noticed the degraded message (server hadnt been rebooted in a
couple years).

The raid1 array was two 500gb black WD drives.  I wasnt able to locate
an identical 500gb disk, but did find a 2TB just to get things
mirrored again.  The bios screen accepted the replacement disk and
said it would rebuild in the OS.  mdsync seemed to do its thing but I
noticed mdmon process was taking 200% cpu.  I let it go a few days
thinking it was just taking longer than normal to sync, then rebooted.
It was in a complete failed state and wouldnt boot at all.  After
removing the 2TB disk I was able to boot into the OS again.  I just
assumed I needed a similar drive size for the second part of the
mirror.

Today I installed an identical black WD 500gb drive and its doing the
same behavior.  Currently running a bad block check but in the
meantime I found the wiki and read up a bit on some basic
troubleshooting and asking for help
(https://raid.wiki.kernel.org/index.php/Asking_for_help)

I wanted to attach the output of the commands on that page and hope
someone may have some ideas for rebuilding this second drive.  Thank
you in advance for any suggestions.  Im concerned at this point I only
have one good drive and could possibly lose everything if that failed.

mail:~ # smartctl --xall /dev/sda
smartctl 6.0 2012-10-10 r3643 [i686-linux-3.1.10-1.29-pae] (SUSE RPM)
Copyright (C) 2002-12, Bruce Allen, Christian Franke, www.smartmontools.org

=== START OF INFORMATION SECTION ===
Model Family:     Western Digital Caviar Black
Device Model:     WDC WD5002AALX-00J37A0
Serial Number:    WD-WMAYUL169523
LU WWN Device Id: 5 0014ee 104a23be3
Firmware Version: 15.01H15
User Capacity:    500,107,862,016 bytes [500 GB]
Sector Size:      512 bytes logical/physical
Device is:        In smartctl database [for details use: -P show]
ATA Version is:   ATA8-ACS (minor revision not indicated)
SATA Version is:  SATA 3.0, 6.0 Gb/s (current: 3.0 Gb/s)
Local Time is:    Wed Apr 12 09:33:54 2017 EDT
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
AAM feature is:   Unavailable
APM feature is:   Unavailable
Rd look-ahead is: Enabled
Write cache is:   Enabled
ATA Security is:  Disabled, frozen [SEC2]

=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED

General SMART Values:
Offline data collection status:  (0x82) Offline data collection activity
                                        was completed without error.
                                        Auto Offline Data Collection: Enabled.
Self-test execution status:      (   0) The previous self-test routine completed
                                        without error or no self-test has ever
                                        been run.
Total time to complete Offline
data collection:                ( 8280) seconds.
Offline data collection
capabilities:                    (0x7b) SMART execute Offline immediate.
                                        Auto Offline data collection
on/off supp

                          ort.
                                        Suspend Offline collection upon new
                                        command.
                                        Offline surface scan supported.
                                        Self-test supported.
                                        Conveyance Self-test supported.
                                        Selective Self-test supported.
SMART capabilities:            (0x0003) Saves SMART data before entering
                                        power-saving mode.
                                        Supports SMART auto save timer.
Error logging capability:        (0x01) Error logging supported.
                                        General Purpose Logging supported.
Short self-test routine
recommended polling time:        (   2) minutes.
Extended self-test routine
recommended polling time:        (  84) minutes.
Conveyance self-test routine
recommended polling time:        (   5) minutes.
SCT capabilities:              (0x3037) SCT Status supported.
                                        SCT Feature Control supported.
                                        SCT Data Table supported.

SMART Attributes Data Structure revision number: 16
Vendor Specific SMART Attributes with Thresholds:
ID# ATTRIBUTE_NAME          FLAGS    VALUE WORST THRESH FAIL RAW_VALUE
  1 Raw_Read_Error_Rate     POSR-K   200   200   051    -    273
  3 Spin_Up_Time            POS--K   144   144   021    -    3783
  4 Start_Stop_Count        -O--CK   100   100   000    -    42
  5 Reallocated_Sector_Ct   PO--CK   200   200   140    -    0
  7 Seek_Error_Rate         -OSR-K   200   200   000    -    0
  9 Power_On_Hours          -O--CK   046   046   000    -    39646
 10 Spin_Retry_Count        -O--CK   100   253   000    -    0
 11 Calibration_Retry_Count -O--CK   100   253   000    -    0
 12 Power_Cycle_Count       -O--CK   100   100   000    -    39
192 Power-Off_Retract_Count -O--CK   200   200   000    -    36
193 Load_Cycle_Count        -O--CK   200   200   000    -    5
194 Temperature_Celsius     -O---K   104   104   000    -    39
196 Reallocated_Event_Count -O--CK   200   200   000    -    0
197 Current_Pending_Sector  -O--CK   200   200   000    -    9
198 Offline_Uncorrectable   ----CK   200   200   000    -    7
199 UDMA_CRC_Error_Count    -O--CK   200   200   000    -    0
200 Multi_Zone_Error_Rate   ---R--   200   200   000    -    15
                            ||||||_ K auto-keep
                            |||||__ C event count
                            ||||___ R error rate
                            |||____ S speed/performance
                            ||_____ O updated online
                            |______ P prefailure warning

General Purpose Log Directory Version 1
SMART           Log Directory Version 1 [multi-sector log support]
GP/S  Log at address 0x00 has    1 sectors [Log Directory]
SMART Log at address 0x01 has    1 sectors [Summary SMART error log]
SMART Log at address 0x02 has    5 sectors [Comprehensive SMART error log]
GP    Log at address 0x03 has    6 sectors [Ext. Comprehensive SMART error log]
SMART Log at address 0x06 has    1 sectors [SMART self-test log]
GP    Log at address 0x07 has    1 sectors [Extended self-test log]
SMART Log at address 0x09 has    1 sectors [Selective self-test log]
GP    Log at address 0x10 has    1 sectors [NCQ Command Error log]
GP    Log at address 0x11 has    1 sectors [SATA Phy Event Counters]
GP/S  Log at address 0x80 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x81 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x82 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x83 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x84 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x85 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x86 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x87 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x88 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x89 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8a has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8b has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8c has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8d has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8e has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x8f has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x90 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x91 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x92 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x93 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x94 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x95 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x96 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x97 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x98 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x99 has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9a has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9b has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9c has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9d has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9e has   16 sectors [Host vendor specific log]
GP/S  Log at address 0x9f has   16 sectors [Host vendor specific log]
GP/S  Log at address 0xa0 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa1 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa2 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa3 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa4 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa5 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa6 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa7 has   16 sectors [Device vendor specific log]
GP/S  Log at address 0xa8 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xa9 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xaa has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xab has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xac has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xad has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xae has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xaf has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb0 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb1 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb2 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb3 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb4 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb5 has    1 sectors [Device vendor specific log]
GP    Log at address 0xb6 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xb7 has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xbd has    1 sectors [Device vendor specific log]
GP/S  Log at address 0xc0 has    1 sectors [Device vendor specific log]
GP    Log at address 0xc1 has   24 sectors [Device vendor specific log]
GP/S  Log at address 0xe0 has    1 sectors [SCT Command/Status]
GP/S  Log at address 0xe1 has    1 sectors [SCT Data Transfer]

SMART Extended Comprehensive Error Log Version: 1 (6 sectors)
Device Error Count: 209 (device log contains only the most recent 24 errors)
        CR     = Command Register
        FEATR  = Features Register
        COUNT  = Count (was: Sector Count) Register
        LBA_48 = Upper bytes of LBA High/Mid/Low Registers ]  ATA-8
        LH     = LBA High (was: Cylinder High) Register    ]   LBA
        LM     = LBA Mid (was: Cylinder Low) Register      ] Register
        LL     = LBA Low (was: Sector Number) Register     ]
        DV     = Device (was: Device/Head) Register
        DC     = Device Control Register
        ER     = Error register
        ST     = Status register
Powered_Up_Time is measured from power on, and printed as
DDd+hh:mm:SS.sss where DD=days, hh=hours, mm=minutes,
SS=sec, and sss=millisec. It "wraps" after 49.710 days.

Error 209 [16] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:11:05.460  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:11:05.460  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:11:05.460  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:11:05.457  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:11:05.457  SET
FEATURES [Set tra

                                nsfer mode]

Error 208 [15] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:11:03.702  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:11:03.702  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:11:03.702  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:11:03.701  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:11:03.701  SET
FEATURES [Set tra

                                nsfer mode]

Error 207 [14] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:11:01.947  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:11:01.947  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:11:01.947  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:11:01.944  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:11:01.944  SET
FEATURES [Set tra

                                nsfer mode]

Error 206 [13] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:11:00.189  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:11:00.189  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:11:00.189  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:11:00.188  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:11:00.188  SET
FEATURES [Set tra

                                nsfer mode]

Error 205 [12] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:10:58.434  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:10:58.434  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:58.434  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:10:58.431  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:10:58.431  SET
FEATURES [Set tra

                                nsfer mode]

Error 204 [11] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:10:56.681  READ FPDMA QUEUED
  ea 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:56.660  FLUSH CACHE EXT
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:10:56.659  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:56.659  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:10:56.658  IDENTIFY DEVICE

Error 203 [10] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h

                          ours)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:10:54.903  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:10:54.903  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:54.903  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:10:54.901  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:10:54.901  SET
FEATURES [Set tra

                                nsfer mode]

Error 202 [9] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 ho

                           urs)
  When the command that caused the error occurred, the device was
active or idle

                             .

  After command completion occurred, registers were:
  ER -- ST COUNT  LBA_48  LH LM LL DV DC
  -- -- -- == -- == == == -- -- -- -- --
  40 -- 51 00 00 00 00 13 34 8f 60 40 00  Error: UNC at LBA =
0x13348f60 = 32221

                                 1680

  Commands leading to the command that caused the error were:
  CR FEATR COUNT  LBA_48  LH LM LL DV DC  Powered_Up_Time  Command/Feature_Name
  -- == -- == -- == == == -- -- -- -- --  ---------------  --------------------
  60 00 08 00 00 00 00 13 34 8f 60 40 08     01:10:53.148  READ FPDMA QUEUED
  ef 00 10 00 02 00 00 00 00 00 00 a0 08     01:10:53.147  SET
FEATURES [Reserve

                                d for Serial ATA]
  27 00 00 00 00 00 00 00 00 00 00 e0 08     01:10:53.146  READ NATIVE
MAX ADDRE

                        SS EXT
  ec 00 00 00 00 00 00 00 00 00 00 a0 08     01:10:53.145  IDENTIFY DEVICE
  ef 00 03 00 46 00 00 00 00 00 00 a0 08     01:10:53.145  SET
FEATURES [Set tra

                                nsfer mode]

SMART Extended Self-test Log Version: 1 (1 sectors)
No self-tests have been logged.  [To run self-tests, use: smartctl -t]

SMART Selective self-test log data structure revision number 1
 SPAN  MIN_LBA  MAX_LBA  CURRENT_TEST_STATUS
    1        0        0  Not_testing
    2        0        0  Not_testing
    3        0        0  Not_testing
    4        0        0  Not_testing
    5        0        0  Not_testing
Selective self-test flags (0x0):
  After scanning selected spans, do NOT read-scan remainder of disk.
If Selective self-test is pending on power-up, resume after 0 minute delay.

SCT Status Version:                  3
SCT Version (vendor specific):       258 (0x0102)
SCT Support Level:                   1
Device State:                        Active (0)
Current Temperature:                    39 Celsius
Power Cycle Min/Max Temperature:     30/39 Celsius
Lifetime    Min/Max Temperature:      0/39 Celsius
Under/Over Temperature Limit Count:   0/0
SCT Temperature History Version:     2
Temperature Sampling Period:         1 minute
Temperature Logging Interval:        1 minute
Min/Max recommended Temperature:      0/60 Celsius
Min/Max Temperature Limit:           -41/85 Celsius
Temperature History Size (Index):    478 (368)

Index    Estimated Time   Temperature Celsius
 369    2017-04-12 01:36    29  **********
 ...    ..(198 skipped).    ..  **********
  90    2017-04-12 04:55    29  **********
  91    2017-04-12 04:56    28  *********
  92    2017-04-12 04:57    29  **********
 ...    ..( 71 skipped).    ..  **********
 164    2017-04-12 06:09    29  **********
 165    2017-04-12 06:10     ?  -
 166    2017-04-12 06:11    30  ***********
 167    2017-04-12 06:12    30  ***********
 168    2017-04-12 06:13    30  ***********
 169    2017-04-12 06:14    31  ************
 170    2017-04-12 06:15    32  *************
 ...    ..(  3 skipped).    ..  *************
 174    2017-04-12 06:19    32  *************
 175    2017-04-12 06:20    33  **************
 176    2017-04-12 06:21    33  **************
 177    2017-04-12 06:22    33  **************
 178    2017-04-12 06:23    34  ***************
 179    2017-04-12 06:24    34  ***************
 180    2017-04-12 06:25    35  ****************
 ...    ..(  8 skipped).    ..  ****************
 189    2017-04-12 06:34    35  ****************
 190    2017-04-12 06:35    36  *****************
 ...    ..( 23 skipped).    ..  *****************
 214    2017-04-12 06:59    36  *****************
 215    2017-04-12 07:00    37  ******************
 ...    ..(  4 skipped).    ..  ******************
 220    2017-04-12 07:05    37  ******************
 221    2017-04-12 07:06    38  *******************
 222    2017-04-12 07:07    37  ******************
 223    2017-04-12 07:08    38  *******************
 ...    ..(  6 skipped).    ..  *******************
 230    2017-04-12 07:15    38  *******************
 231    2017-04-12 07:16    37  ******************
 232    2017-04-12 07:17    38  *******************
 ...    ..( 14 skipped).    ..  *******************
 247    2017-04-12 07:32    38  *******************
 248    2017-04-12 07:33    39  ********************
 249    2017-04-12 07:34    39  ********************
 250    2017-04-12 07:35    38  *******************
 251    2017-04-12 07:36    39  ********************
 ...    ..(  4 skipped).    ..  ********************
 256    2017-04-12 07:41    39  ********************
 257    2017-04-12 07:42    29  **********
 ...    ..(110 skipped).    ..  **********
 368    2017-04-12 09:33    29  **********

SCT Error Recovery Control command not supported

Device Statistics (GP Log 0x04) not supported

SATA Phy Event Counters (GP Log 0x11)
ID      Size     Value  Description
0x0001  2            0  Command failed due to ICRC error
0x0002  2            0  R_ERR response for data FIS
0x0003  2            0  R_ERR response for device-to-host data FIS
0x0004  2            0  R_ERR response for host-to-device data FIS
0x0005  2            0  R_ERR response for non-data FIS
0x0006  2            0  R_ERR response for device-to-host non-data FIS
0x0007  2            0  R_ERR response for host-to-device non-data FIS
0x000a  2            7  Device-to-host register FISes sent due to a COMRESET
0x000b  2            0  CRC errors within host-to-device FIS
0x8000  4         5831  Vendor specific

mail:~ # mdadm --examine /dev/sda
/dev/sda:
          Magic : Intel Raid ISM Cfg Sig.
        Version : 1.1.00
    Orig Family : 80d98105
         Family : 68a98654
     Generation : 00b83763
     Attributes : All supported
           UUID : 81a6fcf3:48d205e9:aa868e3f:9ad94fa5
       Checksum : 7e0e85bb correct
    MPB Sectors : 2
          Disks : 3
   RAID Devices : 1

[Volume0]:
           UUID : 44c0fda9:b2d38c01:e48120f6:4bed6635
     RAID Level : 1
        Members : 2
          Slots : [__]
    Failed disk : 1
      This Slot : ?
     Array Size : 976766976 (465.76 GiB 500.10 GB)
   Per Dev Size : 976767240 (465.76 GiB 500.10 GB)
  Sector Offset : 0
    Num Stripes : 3815496
     Chunk Size : 64 KiB
       Reserved : 0
  Migrate State : idle
      Map State : failed
    Dirty State : dirty

  Disk00 Serial : WD-WMAYUL169523
          State : active failed
             Id : 00040000
    Usable Size : 976766862 (465.76 GiB 500.10 GB)

  Disk01 Serial : WD-WCC6Y1VENZK4
          State : active failed
             Id : 00050000
    Usable Size : 976766862 (465.76 GiB 500.10 GB)

  Disk02 Serial : Z4Z6V3CV:0
          State : active failed
             Id : ffffffff
    Usable Size : 3907022862 (1863.01 GiB 2000.40 GB)

    Disk Serial : WD-WMAYUL169523
          State : active failed
             Id : 00040000
    Usable Size : 976766862 (465.76 GiB 500.10 GB)
mail:~ # mdadm --detail /dev/sda
mdadm: /dev/sda does not appear to be an md device
mail:~ # mdadm --detail /dev/md126
md126    md126p1  md126p2
mail:~ # mdadm --detail /dev/md126
md126    md126p1  md126p2
mail:~ # mdadm --detail /dev/md126
/dev/md126:
      Container : /dev/md127, member 0
     Raid Level : raid1
     Array Size : 488383488 (465.76 GiB 500.10 GB)
  Used Dev Size : 488383620 (465.76 GiB 500.10 GB)
   Raid Devices : 2
  Total Devices : 1

          State : clean, degraded
 Active Devices : 1
Working Devices : 1
 Failed Devices : 0
  Spare Devices : 0


           UUID : 44c0fda9:b2d38c01:e48120f6:4bed6635
    Number   Major   Minor   RaidDevice State
       1       8        0        0      active sync   /dev/sda
       1       0        0        1      removed
mail:~ # mdadm --detail /dev/md127
/dev/md127:
        Version : imsm
     Raid Level : container
  Total Devices : 2

Working Devices : 2


           UUID : 81a6fcf3:48d205e9:aa868e3f:9ad94fa5
  Member Arrays : /dev/md126

    Number   Major   Minor   RaidDevice

       0       8       16        -        /dev/sdb
       1       8        0        -        /dev/sda
mail:~/lsdrv # ./lsdrv
PCI [ahci] 00:1f.2 RAID bus controller: Intel Corporation 82801 SATA
RAID Controller (rev 05)
├scsi 0:0:0:0 ATAPI    iHAS424   B      {3524253_2N8147500192}
│└sr0 1.00g [11:0] Empty/Unknown
├scsi 1:x:x:x [Empty]
├scsi 2:x:x:x [Empty]
├scsi 3:x:x:x [Empty]
├scsi 4:0:0:0 ATA      WDC WD5002AALX-0 {WD-WMAYUL169523}
│└sda 465.76g [8:0] isw_raid_member
│ ├md126 465.76g [9:126] MD vexternal:/md127/0 raid1 (2) active
DEGRADED, 64k Chunk, recover (none) none
{44c0fda9:b2d38c01:e48120f6:4bed6635}
│ ││                     Partitioned (dos)
│ │├md126p1 4.01g [259:0] swap {57b97914-1b5f-4ac9-b7ca-c0e866535f68}
│ │└md126p2 461.75g [259:1] Partitioned (dos)
{bc3d52aa-a6d5-49a5-ab72-333b8dd5bc6d}
│ │ └Mounted as /dev/md126p2 @ /
│ ├md127 0.00k [9:127] MD vexternal:imsm  () inactive, None (None)
None {81a6fcf3:48d205e9:aa868e3f:9ad94fa5}
│ │                    Empty/Unknown
│ ├sda1 4.01g [8:1] swap {57b97914-1b5f-4ac9-b7ca-c0e866535f68}
│ └sda2 461.75g [8:2] Partitioned (dos) {bc3d52aa-a6d5-49a5-ab72-333b8dd5bc6d}
└scsi 5:0:0:0 ATA      WDC WD5003AZEX-0 {WD-WCC6Y1VENZK4}
 └sdb 465.76g [8:16] isw_raid_member
  └md127 0.00k [9:127] MD vexternal:imsm  () inactive, None (None)
None {81a6fcf3:48d205e9:aa868e3f:9ad94fa5}
                       Empty/Unknown
PCI [sata_sil24] 04:00.0 RAID bus controller: Silicon Image, Inc. SiI
3124 PCI-X Serial ATA Controller (rev 02)
├scsi 6:x:x:x [Empty]
├scsi 7:x:x:x [Empty]
├scsi 8:x:x:x [Empty]
└scsi 9:x:x:x [Empty]
mail:~/lsdrv # cat /proc/mdstat
Personalities : [raid1] [raid0] [raid10] [raid6] [raid5] [raid4]
md126 : active raid1 sda[1]
      488383488 blocks super external:/md127/0 [2/1] [U_]

md127 : inactive sda[1](S) sdb[0](S)
      5928 blocks super external:imsm

unused devices: <none>

^ permalink raw reply

* Re: Linux software raid troubles
From: Reindl Harald @ 2017-04-12 14:45 UTC (permalink / raw)
  To: linuxknight, linux-raid
In-Reply-To: <CAAO=44Y=8xrnWvMp214RFq9Y-KVDQVfEYa2vL-Ahgmbvgs6Y4w@mail.gmail.com>



Am 12.04.2017 um 16:31 schrieb linuxknight:
> Last weekend I was moving a server with a raid1 configuration,
> controlled by a Intel Corporation 82801 SATA RAID Controller.  Upon
> reboot I noticed the degraded message (server hadnt been rebooted in a
> couple years).
> 
> The raid1 array was two 500gb black WD drives.  I wasnt able to locate
> an identical 500gb disk, but did find a 2TB just to get things
> mirrored again.  The bios screen accepted the replacement disk and
> said it would rebuild in the OS.  mdsync seemed to do its thing but I
> noticed mdmon process was taking 200% cpu.  I let it go a few days
> thinking it was just taking longer than normal to sync, then rebooted.
> It was in a complete failed state and wouldnt boot at all.  After
> removing the 2TB disk I was able to boot into the OS again.  I just
> assumed I needed a similar drive size for the second part of the
> mirror.

when you talk about a "SATA RAID Controller" and "The bios screen 
accepted the replacement disk and said it would rebuild in the OS" this 
sadly is not a "linux software raid" at it's own

197 Current_Pending_Sector  -O--CK   200   200   000    -    9
198 Offline_Uncorrectable   ----CK   200   200   000    -    7

i would strongly suggest https://www.gnu.org/software/ddrescue/ and make 
a image of that disk because after 39646 Power_On_Hours it's likely that 
the remaining disk fails completly in a short time and you could at 
least restore the disk-image with "dd" to a new disk if that happens as 
well as mount it with as loop-device


^ permalink raw reply

* Re: [md PATCH 1/2] md: allow creation of mdNNN arrays via md_mod/parameters/new_array
From: Coly Li @ 2017-04-12 14:48 UTC (permalink / raw)
  To: NeilBrown, Shaohua Li; +Cc: linux-raid
In-Reply-To: <149197837299.19936.14922734851405940379.stgit@noble>

On 2017/4/12 下午2:26, NeilBrown wrote:
> The intention when creating the "new_array" parameter and the
> possibility of having array names line "md_HOME" was to transition
> away from the old way of creating arrays and to eventually only use
> this new way.
> 
> The "old" way of creating array is to create a device node in /dev
> and then open it.  The act of opening creates the array.
> This is problematic because sometimes the device node can be opened
> when we don't want to create an array.  This can easily happen
> when some rule triggered by udev looks at a device as it is being
> destroyed.  The node in /dev continues to exist for a short period
> after an array is stopped, and opening it during this time recreates
> the array (as an inactive array).
> 
> Unfortunately no clear plan for the transition was created.  It is now
> time to fix that.
> 
> This patch allows devices with numeric names, like "md999" to be
> created by writing to "new_array".  This will only work if the minor
> number given is not already in use.  This will allow mdadm to
> support the creation of arrays with numbers > 511 (currently not
> possible) by writing to new_array.
> mdadm can, at some point, use this approach to create *all* arrays,
> which will allow the transition to only using the new-way.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Acted-by: Coly Li <colyli@suse.de>


> ---
>  drivers/md/md.c |   34 ++++++++++++++++++++++++++++------
>  1 file changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 9fe930109012..c3d3bae947a1 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -5164,6 +5164,14 @@ static void no_op(struct percpu_ref *r) {}
>  
>  static int md_alloc(dev_t dev, char *name)
>  {
> +	/* If dev is zero, name is the name of a device to allocate with
> +	 * an arbitrary minor number.  It will be "md_???"
> +	 * If dev is non-zero it must be a device number with a MAJOR of
> +	 * MD_MAJOR or mdp_major.  In this case, if "name" is NULL, then
> +	 * the device is being created by opening a node in /dev.
> +	 * If "name" is not NULL, the device is being created by
> +	 * writing to /sys/module/md_mod/parameters/new_array.
> +	 */
>  	static DEFINE_MUTEX(disks_mutex);
>  	struct mddev *mddev = mddev_find(dev);
>  	struct gendisk *disk;
> @@ -5189,7 +5197,7 @@ static int md_alloc(dev_t dev, char *name)
>  	if (mddev->gendisk)
>  		goto abort;
>  
> -	if (name) {
> +	if (name && !dev) {
>  		/* Need to ensure that 'name' is not a duplicate.
>  		 */
>  		struct mddev *mddev2;
> @@ -5203,6 +5211,11 @@ static int md_alloc(dev_t dev, char *name)
>  			}
>  		spin_unlock(&all_mddevs_lock);
>  	}
> +	if (name && dev)
> +		/*
> +		 * Creating /dev/mdNNN via "newarray", so adjust hold_active.
> +		 */
> +		mddev->hold_active = UNTIL_STOP;
>  
>  	error = -ENOMEM;
>  	mddev->queue = blk_alloc_queue(GFP_KERNEL);
> @@ -5279,21 +5292,30 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
>  
>  static int add_named_array(const char *val, struct kernel_param *kp)
>  {
> -	/* val must be "md_*" where * is not all digits.
> -	 * We allocate an array with a large free minor number, and
> +	/* val must be "md_*" or "mdNNN".
> +	 * For "md_*" we allocate an array with a large free minor number, and
>  	 * set the name to val.  val must not already be an active name.
> +	 * For "mdNNN" we allocate an array with the minor number NNN
> +	 * which must not already be in use.
>  	 */
>  	int len = strlen(val);
>  	char buf[DISK_NAME_LEN];
> +	unsigned long devnum;
>  
>  	while (len && val[len-1] == '\n')
>  		len--;
>  	if (len >= DISK_NAME_LEN)
>  		return -E2BIG;
>  	strlcpy(buf, val, len+1);
> -	if (strncmp(buf, "md_", 3) != 0)
> -		return -EINVAL;
> -	return md_alloc(0, buf);
> +	if (strncmp(buf, "md_", 3) == 0)
> +		return md_alloc(0, buf);
> +	if (strncmp(buf, "md", 2) == 0 &&
> +	    isdigit(buf[2]) &&
> +	    kstrtoul(buf+2, 10, &devnum) == 0 &&
> +	    devnum <= MINORMASK)
> +		return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
> +
> +	return -EINVAL;
>  }
>  
>  static void md_safemode_timeout(unsigned long data)
> 
> 


^ permalink raw reply

* Re: [md PATCH 2/2] md: support disabling of create-on-open semantics.
From: Coly Li @ 2017-04-12 14:49 UTC (permalink / raw)
  To: NeilBrown, Shaohua Li; +Cc: linux-raid
In-Reply-To: <149197837322.19936.7035050500466184535.stgit@noble>

On 2017/4/12 下午2:26, NeilBrown wrote:
> md allows a new array device to be created by simply
> opening a device file.  This make it difficult to
> remove the device and udev is likely to open the device file
> as part of processing the REMOVE event.
> 
> There is an alternate mechanism for creating arrays
> by writing to the new_array module parameter.
> When using tools that work with this parameter, it is
> best to disable the old semantics.
> This new module parameter allows that.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>

Acked-by: Coly Li <colyli@suse.de>

> ---
>  drivers/md/md.c |   14 +++++++++++++-
>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index c3d3bae947a1..a7ab769eacc3 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -174,6 +174,16 @@ static const struct block_device_operations md_fops;
>  
>  static int start_readonly;
>  
> +/*
> + * The original mechanism for creating an md device is to create
> + * a device node in /dev and to open it.  This causes races with device-close.
> + * The preferred method is to write to the "new_array" module parameter.
> + * This can avoid races.
> + * Setting create_on_open to false disables the original mechanism
> + * so all the races disappear.
> + */
> +static bool create_on_open = true;
> +
>  /* bio_clone_mddev
>   * like bio_clone_bioset, but with a local bio set
>   */
> @@ -5286,7 +5296,8 @@ static int md_alloc(dev_t dev, char *name)
>  
>  static struct kobject *md_probe(dev_t dev, int *part, void *data)
>  {
> -	md_alloc(dev, NULL);
> +	if (create_on_open)
> +		md_alloc(dev, NULL);
>  	return NULL;
>  }
>  
> @@ -9202,6 +9213,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
>  module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
>  module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
>  module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
> +module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
>  
>  MODULE_LICENSE("GPL");
>  MODULE_DESCRIPTION("MD RAID framework");
> 
> 


^ permalink raw reply

* Re: Linux software raid troubles
From: Reindl Harald @ 2017-04-12 15:29 UTC (permalink / raw)
  To: linuxknight, linux-raid
In-Reply-To: <CAAO=44bsG1yrmYbag1eruNA_tdXxqiJptnRyPB=4TW6r5771HQ@mail.gmail.com>

please no private-only respones

Am 12.04.2017 um 16:52 schrieb linuxknight:
> Thanks you for the reply.  I was just examining the hardward in my
> server and it looks like there is an LSI card in there.  If I create a
> new Hardware raid mirror in that controller, is it possible to use the
> ddrescue to get my current OS onto that mirror and boot from it?  Im
> unfamiliar with the ddrescue but will certainly read up more.

"ddrescue" is at the end of the day the same as "dd"

it reads the whole drive block-by-block and writes it to a image file, 
later you can do "dd if=image.mig of=/dev/sdX bs=1M" and you get a 100% 
identical state of the disk

so just put out that drive, connect it to a ordinary SATA adapter, take 
the image and be happy that you have a backup, if the RAID-controller 
has stored whatever metadata on begin of the drive it's also part of the 
image

and hence leave out that controller to get a 100% block-by-block copy of 
the whole drive

> On Wed, Apr 12, 2017 at 10:45 AM, Reindl Harald <h.reindl@thelounge.net> wrote:
>>
>>
>> Am 12.04.2017 um 16:31 schrieb linuxknight:
>>>
>>> Last weekend I was moving a server with a raid1 configuration,
>>> controlled by a Intel Corporation 82801 SATA RAID Controller.  Upon
>>> reboot I noticed the degraded message (server hadnt been rebooted in a
>>> couple years).
>>>
>>> The raid1 array was two 500gb black WD drives.  I wasnt able to locate
>>> an identical 500gb disk, but did find a 2TB just to get things
>>> mirrored again.  The bios screen accepted the replacement disk and
>>> said it would rebuild in the OS.  mdsync seemed to do its thing but I
>>> noticed mdmon process was taking 200% cpu.  I let it go a few days
>>> thinking it was just taking longer than normal to sync, then rebooted.
>>> It was in a complete failed state and wouldnt boot at all.  After
>>> removing the 2TB disk I was able to boot into the OS again.  I just
>>> assumed I needed a similar drive size for the second part of the
>>> mirror.
>>
>>
>> when you talk about a "SATA RAID Controller" and "The bios screen accepted
>> the replacement disk and said it would rebuild in the OS" this sadly is not
>> a "linux software raid" at it's own
>>
>> 197 Current_Pending_Sector  -O--CK   200   200   000    -    9
>> 198 Offline_Uncorrectable   ----CK   200   200   000    -    7
>>
>> i would strongly suggest https://www.gnu.org/software/ddrescue/ and make a
>> image of that disk because after 39646 Power_On_Hours it's likely that the
>> remaining disk fails completly in a short time and you could at least
>> restore the disk-image with "dd" to a new disk if that happens as well as
>> mount it with as loop-device


^ permalink raw reply

* Re: Linux software raid troubles
From: linuxknight @ 2017-04-12 15:36 UTC (permalink / raw)
  To: Reindl Harald; +Cc: linux-raid
In-Reply-To: <cbadbdeb-7e6b-7784-e49c-dd2801903bb6@thelounge.net>

Thank you Reindl, Using your method would I be able to apply this IMG
file to a fresh raid1 mirror and still have it be bootable?

The reason I ask is I was looking at this guide,
https://www.data-medics.com/forum/how-to-clone-a-hard-drive-with-bad-sectors-using-ddrescue-t133.html
It has a method to transfer drive to drive.  I was thinking I would
create the fresh RAID mirror on the dedicated LSI card, then ddrescue
possibly bad drive to the new raid mirror.  Is this a bad idea?

On Wed, Apr 12, 2017 at 11:29 AM, Reindl Harald <h.reindl@thelounge.net> wrote:
> please no private-only respones
>
> Am 12.04.2017 um 16:52 schrieb linuxknight:
>>
>> Thanks you for the reply.  I was just examining the hardward in my
>> server and it looks like there is an LSI card in there.  If I create a
>> new Hardware raid mirror in that controller, is it possible to use the
>> ddrescue to get my current OS onto that mirror and boot from it?  Im
>> unfamiliar with the ddrescue but will certainly read up more.
>
>
> "ddrescue" is at the end of the day the same as "dd"
>
> it reads the whole drive block-by-block and writes it to a image file, later
> you can do "dd if=image.mig of=/dev/sdX bs=1M" and you get a 100% identical
> state of the disk
>
> so just put out that drive, connect it to a ordinary SATA adapter, take the
> image and be happy that you have a backup, if the RAID-controller has stored
> whatever metadata on begin of the drive it's also part of the image
>
> and hence leave out that controller to get a 100% block-by-block copy of the
> whole drive
>
>
>> On Wed, Apr 12, 2017 at 10:45 AM, Reindl Harald <h.reindl@thelounge.net>
>> wrote:
>>>
>>>
>>>
>>> Am 12.04.2017 um 16:31 schrieb linuxknight:
>>>>
>>>>
>>>> Last weekend I was moving a server with a raid1 configuration,
>>>> controlled by a Intel Corporation 82801 SATA RAID Controller.  Upon
>>>> reboot I noticed the degraded message (server hadnt been rebooted in a
>>>> couple years).
>>>>
>>>> The raid1 array was two 500gb black WD drives.  I wasnt able to locate
>>>> an identical 500gb disk, but did find a 2TB just to get things
>>>> mirrored again.  The bios screen accepted the replacement disk and
>>>> said it would rebuild in the OS.  mdsync seemed to do its thing but I
>>>> noticed mdmon process was taking 200% cpu.  I let it go a few days
>>>> thinking it was just taking longer than normal to sync, then rebooted.
>>>> It was in a complete failed state and wouldnt boot at all.  After
>>>> removing the 2TB disk I was able to boot into the OS again.  I just
>>>> assumed I needed a similar drive size for the second part of the
>>>> mirror.
>>>
>>>
>>>
>>> when you talk about a "SATA RAID Controller" and "The bios screen
>>> accepted
>>> the replacement disk and said it would rebuild in the OS" this sadly is
>>> not
>>> a "linux software raid" at it's own
>>>
>>> 197 Current_Pending_Sector  -O--CK   200   200   000    -    9
>>> 198 Offline_Uncorrectable   ----CK   200   200   000    -    7
>>>
>>> i would strongly suggest https://www.gnu.org/software/ddrescue/ and make
>>> a
>>> image of that disk because after 39646 Power_On_Hours it's likely that
>>> the
>>> remaining disk fails completly in a short time and you could at least
>>> restore the disk-image with "dd" to a new disk if that happens as well as
>>> mount it with as loop-device
>
>

^ permalink raw reply

* Re: Linux software raid troubles
From: Reindl Harald @ 2017-04-12 16:11 UTC (permalink / raw)
  To: linuxknight; +Cc: linux-raid
In-Reply-To: <CAAO=44Y9YspCqyfvDgx30mGO_VhYZpuj9D8U1O6063B8VL61vw@mail.gmail.com>



Am 12.04.2017 um 17:36 schrieb linuxknight:
> Thank you Reindl, Using your method would I be able to apply this IMG
> file to a fresh raid1 mirror and still have it be bootable?

that's the whole point - there is no difference if you have another 
phyiscal disk or a image-file as destination - thanks linux everything 
is a file

whenever you play around with disks which might fail or are already 
broken take a complete image as soon as possible because before you try 
to restore something from that image you can even copy that one, try to 
mount it, play around and whenever you are unsure if you damaged it's 
state just make a fresh copy from the untouched first backup

> The reason I ask is I was looking at this guide,
> https://www.data-medics.com/forum/how-to-clone-a-hard-drive-with-bad-sectors-using-ddrescue-t133.html
> It has a method to transfer drive to drive.  I was thinking I would
> create the fresh RAID mirror on the dedicated LSI card, then ddrescue
> possibly bad drive to the new raid mirror.  Is this a bad idea?
> 
> On Wed, Apr 12, 2017 at 11:29 AM, Reindl Harald <h.reindl@thelounge.net> wrote:
>> please no private-only respones
>>
>> Am 12.04.2017 um 16:52 schrieb linuxknight:
>>>
>>> Thanks you for the reply.  I was just examining the hardward in my
>>> server and it looks like there is an LSI card in there.  If I create a
>>> new Hardware raid mirror in that controller, is it possible to use the
>>> ddrescue to get my current OS onto that mirror and boot from it?  Im
>>> unfamiliar with the ddrescue but will certainly read up more.
>>
>>
>> "ddrescue" is at the end of the day the same as "dd"
>>
>> it reads the whole drive block-by-block and writes it to a image file, later
>> you can do "dd if=image.mig of=/dev/sdX bs=1M" and you get a 100% identical
>> state of the disk
>>
>> so just put out that drive, connect it to a ordinary SATA adapter, take the
>> image and be happy that you have a backup, if the RAID-controller has stored
>> whatever metadata on begin of the drive it's also part of the image
>>
>> and hence leave out that controller to get a 100% block-by-block copy of the
>> whole drive
>>
>>
>>> On Wed, Apr 12, 2017 at 10:45 AM, Reindl Harald <h.reindl@thelounge.net>
>>> wrote:
>>>>
>>>>
>>>>
>>>> Am 12.04.2017 um 16:31 schrieb linuxknight:
>>>>>
>>>>>
>>>>> Last weekend I was moving a server with a raid1 configuration,
>>>>> controlled by a Intel Corporation 82801 SATA RAID Controller.  Upon
>>>>> reboot I noticed the degraded message (server hadnt been rebooted in a
>>>>> couple years).
>>>>>
>>>>> The raid1 array was two 500gb black WD drives.  I wasnt able to locate
>>>>> an identical 500gb disk, but did find a 2TB just to get things
>>>>> mirrored again.  The bios screen accepted the replacement disk and
>>>>> said it would rebuild in the OS.  mdsync seemed to do its thing but I
>>>>> noticed mdmon process was taking 200% cpu.  I let it go a few days
>>>>> thinking it was just taking longer than normal to sync, then rebooted.
>>>>> It was in a complete failed state and wouldnt boot at all.  After
>>>>> removing the 2TB disk I was able to boot into the OS again.  I just
>>>>> assumed I needed a similar drive size for the second part of the
>>>>> mirror.
>>>>
>>>>
>>>>
>>>> when you talk about a "SATA RAID Controller" and "The bios screen
>>>> accepted
>>>> the replacement disk and said it would rebuild in the OS" this sadly is
>>>> not
>>>> a "linux software raid" at it's own
>>>>
>>>> 197 Current_Pending_Sector  -O--CK   200   200   000    -    9
>>>> 198 Offline_Uncorrectable   ----CK   200   200   000    -    7
>>>>
>>>> i would strongly suggest https://www.gnu.org/software/ddrescue/ and make
>>>> a
>>>> image of that disk because after 39646 Power_On_Hours it's likely that
>>>> the
>>>> remaining disk fails completly in a short time and you could at least
>>>> restore the disk-image with "dd" to a new disk if that happens as well as
>>>> mount it with as loop-device


^ permalink raw reply

* Re: [PATCH v1 1/2] mdadm/manpage:update manpage for readonly parameter
From: Jes Sorensen @ 2017-04-12 17:51 UTC (permalink / raw)
  To: Zhilong Liu; +Cc: linux-raid
In-Reply-To: <1491986198-16642-1-git-send-email-zlliu@suse.com>

On 04/12/2017 04:36 AM, Zhilong Liu wrote:
> update readonly in manpage:
> Currently both the readwrite and readonly are worked well,
> update the readonly section.
> One commit in linux/driver/md. Cleared "MD_CLOSING bit" to
> Fixes: af8d8e6f0315 ("md: changes for MD_STILL_CLOSED flag")
>
> Signed-off-by: Zhilong Liu <zlliu@suse.com>
> ---
>  mdadm.8.in | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)

Applied!

Thanks,
Jes

> diff --git a/mdadm.8.in b/mdadm.8.in
> index 744c12b..f10a8b8 100644
> --- a/mdadm.8.in
> +++ b/mdadm.8.in
> @@ -925,7 +925,8 @@ will not try to be so clever.
>  Start the array
>  .B read only
>  rather than read-write as normal.  No writes will be allowed to the
> -array, and no resync, recovery, or reshape will be started.
> +array, and no resync, recovery, or reshape will be started. It works with
> +Create, Assemble, Manage and Misc mode.
>
>  .TP
>  .BR \-a ", " "\-\-auto{=yes,md,mdp,part,p}{NN}"
> @@ -2232,7 +2233,7 @@ be in use.
>
>  .TP
>  .B \-\-readonly
> -start the array readonly \(em not supported yet.
> +start the array in readonly mode.
>
>  .SH MANAGE MODE
>  .HP 12
>


^ permalink raw reply

* Re: [PATCH v1 2/2] mdadm/manpage:clustered arrays don't support array-size yet
From: Jes Sorensen @ 2017-04-12 17:57 UTC (permalink / raw)
  To: Zhilong Liu; +Cc: linux-raid
In-Reply-To: <1491986247-16706-1-git-send-email-zlliu@suse.com>

On 04/12/2017 04:37 AM, Zhilong Liu wrote:
> Update manpage for array-size section:
> Clustered arrays don't support the --array-size yet.
>
> Signed-off-by: Zhilong Liu <zlliu@suse.com>
> ---
>  mdadm.8.in | 2 ++
>  1 file changed, 2 insertions(+)

Applied!

Thanks,
Jes



^ permalink raw reply

* Re: [md PATCH 0/2] Make it possible to disable create_on_open semantics.
From: Shaohua Li @ 2017-04-12 19:24 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-raid, Coly Li
In-Reply-To: <149197804398.19936.12809382889200123725.stgit@noble>

On Wed, Apr 12, 2017 at 04:26:12PM +1000, Neil Brown wrote:
> Currently, opening an md /dev node will create the array object.
> This makes it hard to destroy the object as udev will typically
> re-open the device node when handling REMOVE events.
> 
> The "new_array" module parameter was created to work towards avoiding
> this problem, and it can be used when
>   CREATE names=yes
> 
> is given in /etc/mdadm.conf.
> How this doesn't currently support names like "md%d", which lots of
> people use and expect, so we need more work before we can transition
> away from create_on_open.
> 
> These patches add support to "new_array" so that md%d devices
> can be created.  This will make it, once again, possible to have
> md%d devices with numbers > 511. (3.17 make this impossible).
> 
> An enhancement to mdadm that uses this will cause new_array to always
> be used (where available), and we can then disable create_on_open
> completely (after suitable transition periods).

Thanks, applied! The md device creation interface especially create_on_open is
a disaster, hopefully the future sysfs/configfs interface deprecates all of these.

Thanks,
Shaohua

^ permalink raw reply

* [md PATCH] md: handle read-only member devices better.
From: NeilBrown @ 2017-04-12 22:53 UTC (permalink / raw)
  To: Shaohua Li; +Cc: Linux-RAID, Nanda Kishore Chinnaram

[-- Attachment #1: Type: text/plain, Size: 2813 bytes --]


1/ If an array has any read-only devices when it is started,
   the array itself must be read-only
2/ A read-only device cannot be added to an array after it is
   started.
3/ Setting an array to read-write should not succeed
   if any member devices are read-only

Reported-and-Tested-by: Nanda Kishore Chinnaram <Nanda_Kishore_Chinna@dell.com>
Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/md/md.c | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 22894303d335..9fe930109012 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2093,6 +2093,10 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
 	if (find_rdev(mddev, rdev->bdev->bd_dev))
 		return -EEXIST;
 
+	if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
+	    mddev->pers)
+		return -EROFS;
+
 	/* make sure rdev->sectors exceeds mddev->dev_sectors */
 	if (!test_bit(Journal, &rdev->flags) &&
 	    rdev->sectors &&
@@ -5345,6 +5349,13 @@ int md_run(struct mddev *mddev)
 			continue;
 		sync_blockdev(rdev->bdev);
 		invalidate_bdev(rdev->bdev);
+		if (mddev->ro != 1 &&
+		    (bdev_read_only(rdev->bdev) ||
+		     bdev_read_only(rdev->meta_bdev))) {
+			mddev->ro = 1;
+			if (mddev->gendisk)
+				set_disk_ro(mddev->gendisk, 1);
+		}
 
 		/* perform some consistency tests on the device.
 		 * We don't want the data to overlap the metadata,
@@ -5569,6 +5580,9 @@ static int do_md_run(struct mddev *mddev)
 static int restart_array(struct mddev *mddev)
 {
 	struct gendisk *disk = mddev->gendisk;
+	struct md_rdev *rdev;
+	bool has_journal = false;
+	bool has_readonly = false;
 
 	/* Complain if it has no devices */
 	if (list_empty(&mddev->disks))
@@ -5577,24 +5591,21 @@ static int restart_array(struct mddev *mddev)
 		return -EINVAL;
 	if (!mddev->ro)
 		return -EBUSY;
-	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
-		struct md_rdev *rdev;
-		bool has_journal = false;
-
-		rcu_read_lock();
-		rdev_for_each_rcu(rdev, mddev) {
-			if (test_bit(Journal, &rdev->flags) &&
-			    !test_bit(Faulty, &rdev->flags)) {
-				has_journal = true;
-				break;
-			}
-		}
-		rcu_read_unlock();
 
+	rcu_read_lock();
+	rdev_for_each_rcu(rdev, mddev) {
+		if (test_bit(Journal, &rdev->flags) &&
+		    !test_bit(Faulty, &rdev->flags))
+			has_journal = true;
+		if (bdev_read_only(rdev->bdev))
+			has_readonly = true;
+	}
+	rcu_read_unlock();
+	if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
 		/* Don't restart rw with journal missing/faulty */
-		if (!has_journal)
 			return -EINVAL;
-	}
+	if (has_readonly)
+		return -EROFS;
 
 	mddev->safemode = 0;
 	mddev->ro = 0;
-- 
2.12.2


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply related

* [v4 1/2] lib/raid6: Build proper files on corresponding arch
From: Matt Brown @ 2017-04-12 23:15 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: dja, linux-raid

Previously the raid6 test Makefile did not correctly build the files for
testing on PowerPC. This patch fixes the bug, so that all appropriate files
for PowerPC are built.

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
Changlog
v2 - v4
	- fixup whitespace
	- change versioning to match other patch
---
 lib/raid6/test/Makefile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 9c333e9..b64a267 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
         HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
-                         gcc -c -x c - >&/dev/null && \
-                         rm ./-.o && echo yes)
+                         gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
         ifeq ($(HAS_ALTIVEC),yes)
-                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+                CFLAGS += -I../../../arch/powerpc/include
+                CFLAGS += -DCONFIG_ALTIVEC
+                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+                        vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
         endif
 endif
 ifeq ($(ARCH),tilegx)
-- 
2.9.3


^ permalink raw reply related

* [v4 2/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome
From: Matt Brown @ 2017-04-12 23:15 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: dja, linux-raid
In-Reply-To: <20170412231546.15715-1-matthew.brown.dev@gmail.com>

The raid6 Q syndrome check has been optimised using the vpermxor
instruction. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

	Performance benchmarks:
		raid6: altivecx4 gen() 18773 MB/s
		raid6: altivecx8 gen() 19438 MB/s

		raid6: vpermxor4 gen() 25112 MB/s
	    	raid6: vpermxor8 gen() 26279 MB/s

Note: Fixed minor bug in pq.h regarding missing and mismatched ifdef
statements.

Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/Makefile      |  27 ++++++++++++-
 lib/raid6/algos.c       |   4 ++
 lib/raid6/altivec.uc    |   3 ++
 lib/raid6/test/Makefile |  14 ++++++-
 lib/raid6/vpermxor.uc   | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 154 insertions(+), 2 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..db095a7 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+                              vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_altivec2,
 	&raid6_altivec4,
 	&raid6_altivec8,
+	&raid6_vpermxor1,
+	&raid6_vpermxor2,
+	&raid6_vpermxor4,
+	&raid6_vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
 	&raid6_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include <linux/raid/pq.h>
 
+#ifdef CONFIG_ALTIVEC
+
 #include <altivec.h>
 #ifdef __KERNEL__
 # include <asm/cputable.h>
 # include <asm/switch_to.h>
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..9c333e9 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -97,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
 int1.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=1 < int.uc > $@
 
@@ -122,7 +134,7 @@ tables.c: mktables
 	./mktables > tables.c
 
 clean:
-	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
 	rm -f tilegx*.c
 
 spotless: clean
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 0000000..31a324d
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor opcode to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and the vpermxor opcode.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+					    0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+					    0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+					     0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+					     0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+							void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+	unative_t wp$$, wq$$, wd$$;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	for (d = 0; d < bytes; d += NSIZE*$#) {
+		wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+		for (z = z0-1; z>=0; z--) {
+			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+			/* P syndrome */
+			wp$$ = vec_xor(wp$$, wd$$);
+
+			/*Q syndrome */
+			asm("vpermxor %0,%1,%2,%3":"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+			wq$$ = vec_xor(wq$$, wd$$);
+		}
+		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
+		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
+	}
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+
+	raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+	disable_kernel_altivec();
+	preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+	/* Check if CPU has both altivec and the vpermxor instruction*/
+# ifdef __KERNEL__
+	return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+		cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+	return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+	raid6_vpermxor$#_gen_syndrome,
+	NULL,
+	raid6_have_altivec_vpermxor,
+	"vpermxor$#",
+	0
+};
+#endif
-- 
2.9.3


^ permalink raw reply related

* Re: [md PATCH] md: handle read-only member devices better.
From: Shaohua Li @ 2017-04-13  5:47 UTC (permalink / raw)
  To: NeilBrown; +Cc: Linux-RAID, Nanda Kishore Chinnaram
In-Reply-To: <87a87lutj7.fsf@notabene.neil.brown.name>

On Thu, Apr 13, 2017 at 08:53:48AM +1000, Neil Brown wrote:
> 
> 1/ If an array has any read-only devices when it is started,
>    the array itself must be read-only
> 2/ A read-only device cannot be added to an array after it is
>    started.
> 3/ Setting an array to read-write should not succeed
>    if any member devices are read-only

Didn't get these. We call md_import_device() first to open under layer disk. We
always use FMOD_READ|FMOD_WRITE to open the disk. So if the disk is ro,
md_import_device should fail, we don't add the disk to the array. Why would we
have such issues?

Thanks,
Shaohua
 
> Reported-and-Tested-by: Nanda Kishore Chinnaram <Nanda_Kishore_Chinna@dell.com>
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
>  drivers/md/md.c | 41 ++++++++++++++++++++++++++---------------
>  1 file changed, 26 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 22894303d335..9fe930109012 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2093,6 +2093,10 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
>  	if (find_rdev(mddev, rdev->bdev->bd_dev))
>  		return -EEXIST;
>  
> +	if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
> +	    mddev->pers)
> +		return -EROFS;
> +
>  	/* make sure rdev->sectors exceeds mddev->dev_sectors */
>  	if (!test_bit(Journal, &rdev->flags) &&
>  	    rdev->sectors &&
> @@ -5345,6 +5349,13 @@ int md_run(struct mddev *mddev)
>  			continue;
>  		sync_blockdev(rdev->bdev);
>  		invalidate_bdev(rdev->bdev);
> +		if (mddev->ro != 1 &&
> +		    (bdev_read_only(rdev->bdev) ||
> +		     bdev_read_only(rdev->meta_bdev))) {
> +			mddev->ro = 1;
> +			if (mddev->gendisk)
> +				set_disk_ro(mddev->gendisk, 1);
> +		}
>  
>  		/* perform some consistency tests on the device.
>  		 * We don't want the data to overlap the metadata,
> @@ -5569,6 +5580,9 @@ static int do_md_run(struct mddev *mddev)
>  static int restart_array(struct mddev *mddev)
>  {
>  	struct gendisk *disk = mddev->gendisk;
> +	struct md_rdev *rdev;
> +	bool has_journal = false;
> +	bool has_readonly = false;
>  
>  	/* Complain if it has no devices */
>  	if (list_empty(&mddev->disks))
> @@ -5577,24 +5591,21 @@ static int restart_array(struct mddev *mddev)
>  		return -EINVAL;
>  	if (!mddev->ro)
>  		return -EBUSY;
> -	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
> -		struct md_rdev *rdev;
> -		bool has_journal = false;
> -
> -		rcu_read_lock();
> -		rdev_for_each_rcu(rdev, mddev) {
> -			if (test_bit(Journal, &rdev->flags) &&
> -			    !test_bit(Faulty, &rdev->flags)) {
> -				has_journal = true;
> -				break;
> -			}
> -		}
> -		rcu_read_unlock();
>  
> +	rcu_read_lock();
> +	rdev_for_each_rcu(rdev, mddev) {
> +		if (test_bit(Journal, &rdev->flags) &&
> +		    !test_bit(Faulty, &rdev->flags))
> +			has_journal = true;
> +		if (bdev_read_only(rdev->bdev))
> +			has_readonly = true;
> +	}
> +	rcu_read_unlock();
> +	if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
>  		/* Don't restart rw with journal missing/faulty */
> -		if (!has_journal)
>  			return -EINVAL;
> -	}
> +	if (has_readonly)
> +		return -EROFS;
>  
>  	mddev->safemode = 0;
>  	mddev->ro = 0;
> -- 
> 2.12.2
> 



^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox