* [PATCH 1/2] lib/raid6: Build proper files on corresponding arch
From: Matt Brown @ 2017-04-12 1:35 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-raid, dja
Previously the raid6 test Makefile did not correctly build the files for
testing on PowerPC. This patch fixes the bug, so that all appropriate files
for PowerPC are built.
Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
lib/raid6/test/Makefile | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 9c333e9..62b26d1 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
else
HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
ifeq ($(HAS_ALTIVEC),yes)
- OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+ CFLAGS += -I../../../arch/powerpc/include
+ CFLAGS += -DCONFIG_ALTIVEC
+ OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
endif
endif
ifeq ($(ARCH),tilegx)
--
2.9.3
^ permalink raw reply related
* [PATCH v3 2/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome
From: Matt Brown @ 2017-04-12 1:35 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-raid, dja
In-Reply-To: <20170412013552.21650-1-matthew.brown.dev@gmail.com>
The raid6 Q syndrome check has been optimised using the vpermxor
instruction. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.
The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.
Performance benchmarks:
raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s
raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s
Note: Fixed minor bug in altivec.uc regarding missing and mismatched ifdef
statements.
Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
Changelog
v2
- Change CONFIG_ALTIVEC to CPU_FTR_ALTIVEC_COMP
- Seperate bug fix into different patch
---
include/linux/raid/pq.h | 4 ++
lib/raid6/Makefile | 27 ++++++++++++-
lib/raid6/algos.c | 4 ++
lib/raid6/altivec.uc | 3 ++
lib/raid6/test/Makefile | 14 ++++++-
lib/raid6/vpermxor.uc | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 154 insertions(+), 2 deletions(-)
create mode 100644 lib/raid6/vpermxor.uc
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
extern const struct raid6_calls raid6_avx512x4;
extern const struct raid6_calls raid6_tilegx8;
extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..7775aad 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
int8.o int16.o int32.o
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c: UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_altivec2,
&raid6_altivec4,
&raid6_altivec8,
+ &raid6_vpermxor1,
+ &raid6_vpermxor2,
+ &raid6_vpermxor4,
+ &raid6_vpermxor8,
#endif
#if defined(CONFIG_TILEGX)
&raid6_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
+#endif /* __KERNEL__ */
/*
* This is the C data type to use. We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..9c333e9 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -97,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
altivec8.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
+vpermxor1.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
int1.c: int.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=1 < int.uc > $@
@@ -122,7 +134,7 @@ tables.c: mktables
./mktables > tables.c
clean:
- rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+ rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
rm -f tilegx*.c
spotless: clean
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 0000000..31a324d
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor opcode to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and the vpermxor opcode.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+ 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+ 0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+ 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+ 0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+ void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+ unative_t wp$$, wq$$, wd$$;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for (d = 0; d < bytes; d += NSIZE*$#) {
+ wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+ for (z = z0-1; z>=0; z--) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ /* P syndrome */
+ wp$$ = vec_xor(wp$$, wd$$);
+
+ /*Q syndrome */
+ asm("vpermxor %0,%1,%2,%3":"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+ wq$$ = vec_xor(wq$$, wd$$);
+ }
+ *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ }
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+
+ raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+ /* Check if CPU has both altivec and the vpermxor instruction*/
+# ifdef __KERNEL__
+ return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+ return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+ raid6_vpermxor$#_gen_syndrome,
+ NULL,
+ raid6_have_altivec_vpermxor,
+ "vpermxor$#",
+ 0
+};
+#endif
--
2.9.3
^ permalink raw reply related
* Re: [md PATCH 00/10] Simplify bio splitting and related code.
From: Shaohua Li @ 2017-04-12 2:51 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-raid
In-Reply-To: <871ssywmno.fsf@notabene.neil.brown.name>
On Wed, Apr 12, 2017 at 09:27:07AM +1000, Neil Brown wrote:
> On Tue, Apr 11 2017, Shaohua Li wrote:
>
> > On Wed, Apr 05, 2017 at 02:05:50PM +1000, Neil Brown wrote:
> >> This is part of my little project to make bio splitting
> >> in Linux uniform and dead-lock free, in a way that will mean
> >> that we can get rid of all the bioset threads.
> >>
> >> The basic approach is that when a bio needs to be split, we call
> >> bio_split(), bio_chain() and then generic_make_request().
> >> We then proceed to handle the remainder without further splitting.
> >> Recent changes to generic_make_request() ensure that this will
> >> be safe from deadlocks, providing each bioset is used only once
> >> in the stack.
> >>
> >> This leads to simpler code in various places. In particular, the
> >> splitting of bios that is needed to work around known bad blocks
> >> is now much less complex. There is only ever one r1bio per bio.
> >>
> >> As you can see from
> >> 10 files changed, 335 insertions(+), 540 deletions(-)
> >> there is a net reduction in code.
> >
> > Looks good and makes code simpler, applied, thanks Neil! The patch 1 and 6 need
> > comments in the code to explain how deadlock is avoided though. Care to send a
> > new patch?
>
> It isn't clear to me what sort of comment you want, or where it should
> go.
> It might make sense to have a comment near bio_split() explaining how to
> use it (i.e. explaining the pattern used in various patches here), but
> I don't see what sort of comments would help in raid1.c or raid10.c
> ??
Both raid1.c and raid10.c have comments why we need offload the bio to
raid1d/raid10d to avoid deadlock before, we also have comments to explain why
we do bio_split() and then generic_make_request() before. Now these info are
lost, so I hope we can add it back why the new way (bio_split and follow
generic_make_request of next part) can avoid deadlock. That will be very
helpful for others.
Thanks,
Shaohua
^ permalink raw reply
* [md PATCH 0/2] Make it possible to disable create_on_open semantics.
From: NeilBrown @ 2017-04-12 6:26 UTC (permalink / raw)
To: Shaohua Li; +Cc: linux-raid, Coly Li
Currently, opening an md /dev node will create the array object.
This makes it hard to destroy the object as udev will typically
re-open the device node when handling REMOVE events.
The "new_array" module parameter was created to work towards avoiding
this problem, and it can be used when
CREATE names=yes
is given in /etc/mdadm.conf.
How this doesn't currently support names like "md%d", which lots of
people use and expect, so we need more work before we can transition
away from create_on_open.
These patches add support to "new_array" so that md%d devices
can be created. This will make it, once again, possible to have
md%d devices with numbers > 511. (3.17 make this impossible).
An enhancement to mdadm that uses this will cause new_array to always
be used (where available), and we can then disable create_on_open
completely (after suitable transition periods).
NeilBrown
---
NeilBrown (2):
md: allow creation of mdNNN arrays via md_mod/parameters/new_array
md: support disabling of create-on-open semantics.
drivers/md/md.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 41 insertions(+), 7 deletions(-)
--
Signature
^ permalink raw reply
* [md PATCH 1/2] md: allow creation of mdNNN arrays via md_mod/parameters/new_array
From: NeilBrown @ 2017-04-12 6:26 UTC (permalink / raw)
To: Shaohua Li; +Cc: linux-raid, Coly Li
In-Reply-To: <149197804398.19936.12809382889200123725.stgit@noble>
The intention when creating the "new_array" parameter and the
possibility of having array names line "md_HOME" was to transition
away from the old way of creating arrays and to eventually only use
this new way.
The "old" way of creating array is to create a device node in /dev
and then open it. The act of opening creates the array.
This is problematic because sometimes the device node can be opened
when we don't want to create an array. This can easily happen
when some rule triggered by udev looks at a device as it is being
destroyed. The node in /dev continues to exist for a short period
after an array is stopped, and opening it during this time recreates
the array (as an inactive array).
Unfortunately no clear plan for the transition was created. It is now
time to fix that.
This patch allows devices with numeric names, like "md999" to be
created by writing to "new_array". This will only work if the minor
number given is not already in use. This will allow mdadm to
support the creation of arrays with numbers > 511 (currently not
possible) by writing to new_array.
mdadm can, at some point, use this approach to create *all* arrays,
which will allow the transition to only using the new-way.
Signed-off-by: NeilBrown <neilb@suse.com>
---
drivers/md/md.c | 34 ++++++++++++++++++++++++++++------
1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9fe930109012..c3d3bae947a1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5164,6 +5164,14 @@ static void no_op(struct percpu_ref *r) {}
static int md_alloc(dev_t dev, char *name)
{
+ /* If dev is zero, name is the name of a device to allocate with
+ * an arbitrary minor number. It will be "md_???"
+ * If dev is non-zero it must be a device number with a MAJOR of
+ * MD_MAJOR or mdp_major. In this case, if "name" is NULL, then
+ * the device is being created by opening a node in /dev.
+ * If "name" is not NULL, the device is being created by
+ * writing to /sys/module/md_mod/parameters/new_array.
+ */
static DEFINE_MUTEX(disks_mutex);
struct mddev *mddev = mddev_find(dev);
struct gendisk *disk;
@@ -5189,7 +5197,7 @@ static int md_alloc(dev_t dev, char *name)
if (mddev->gendisk)
goto abort;
- if (name) {
+ if (name && !dev) {
/* Need to ensure that 'name' is not a duplicate.
*/
struct mddev *mddev2;
@@ -5203,6 +5211,11 @@ static int md_alloc(dev_t dev, char *name)
}
spin_unlock(&all_mddevs_lock);
}
+ if (name && dev)
+ /*
+ * Creating /dev/mdNNN via "newarray", so adjust hold_active.
+ */
+ mddev->hold_active = UNTIL_STOP;
error = -ENOMEM;
mddev->queue = blk_alloc_queue(GFP_KERNEL);
@@ -5279,21 +5292,30 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
static int add_named_array(const char *val, struct kernel_param *kp)
{
- /* val must be "md_*" where * is not all digits.
- * We allocate an array with a large free minor number, and
+ /* val must be "md_*" or "mdNNN".
+ * For "md_*" we allocate an array with a large free minor number, and
* set the name to val. val must not already be an active name.
+ * For "mdNNN" we allocate an array with the minor number NNN
+ * which must not already be in use.
*/
int len = strlen(val);
char buf[DISK_NAME_LEN];
+ unsigned long devnum;
while (len && val[len-1] == '\n')
len--;
if (len >= DISK_NAME_LEN)
return -E2BIG;
strlcpy(buf, val, len+1);
- if (strncmp(buf, "md_", 3) != 0)
- return -EINVAL;
- return md_alloc(0, buf);
+ if (strncmp(buf, "md_", 3) == 0)
+ return md_alloc(0, buf);
+ if (strncmp(buf, "md", 2) == 0 &&
+ isdigit(buf[2]) &&
+ kstrtoul(buf+2, 10, &devnum) == 0 &&
+ devnum <= MINORMASK)
+ return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
+
+ return -EINVAL;
}
static void md_safemode_timeout(unsigned long data)
^ permalink raw reply related
* [md PATCH 2/2] md: support disabling of create-on-open semantics.
From: NeilBrown @ 2017-04-12 6:26 UTC (permalink / raw)
To: Shaohua Li; +Cc: linux-raid, Coly Li
In-Reply-To: <149197804398.19936.12809382889200123725.stgit@noble>
md allows a new array device to be created by simply
opening a device file. This make it difficult to
remove the device and udev is likely to open the device file
as part of processing the REMOVE event.
There is an alternate mechanism for creating arrays
by writing to the new_array module parameter.
When using tools that work with this parameter, it is
best to disable the old semantics.
This new module parameter allows that.
Signed-off-by: NeilBrown <neilb@suse.com>
---
drivers/md/md.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c3d3bae947a1..a7ab769eacc3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -174,6 +174,16 @@ static const struct block_device_operations md_fops;
static int start_readonly;
+/*
+ * The original mechanism for creating an md device is to create
+ * a device node in /dev and to open it. This causes races with device-close.
+ * The preferred method is to write to the "new_array" module parameter.
+ * This can avoid races.
+ * Setting create_on_open to false disables the original mechanism
+ * so all the races disappear.
+ */
+static bool create_on_open = true;
+
/* bio_clone_mddev
* like bio_clone_bioset, but with a local bio set
*/
@@ -5286,7 +5296,8 @@ static int md_alloc(dev_t dev, char *name)
static struct kobject *md_probe(dev_t dev, int *part, void *data)
{
- md_alloc(dev, NULL);
+ if (create_on_open)
+ md_alloc(dev, NULL);
return NULL;
}
@@ -9202,6 +9213,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
+module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MD RAID framework");
^ permalink raw reply related
* Re: [PATCH 1/2] lib/raid6: Build proper files on corresponding arch
From: Michael Ellerman @ 2017-04-12 7:01 UTC (permalink / raw)
To: Matt Brown, linuxppc-dev; +Cc: linux-raid, dja
In-Reply-To: <20170412013552.21650-1-matthew.brown.dev@gmail.com>
Matt Brown <matthew.brown.dev@gmail.com> writes:
> diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
> index 9c333e9..62b26d1 100644
> --- a/lib/raid6/test/Makefile
> +++ b/lib/raid6/test/Makefile
> @@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
> CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
> else
> HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
> - gcc -c -x c - >&/dev/null && \
> - rm ./-.o && echo yes)
> + gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
> ifeq ($(HAS_ALTIVEC),yes)
> - OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
> + CFLAGS += -I../../../arch/powerpc/include
> + CFLAGS += -DCONFIG_ALTIVEC
> + OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
> + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
The whitespace in here is a bit of a mess, but you should follow what's
there and use spaces to indent your additions.
cheers
^ permalink raw reply
* [PATCH v1 1/2] mdadm/manpage:update manpage for readonly parameter
From: Zhilong Liu @ 2017-04-12 8:36 UTC (permalink / raw)
To: Jes.Sorensen; +Cc: linux-raid, Zhilong Liu
In-Reply-To: <6afe1397-3063-c5d7-58ba-f3dbdfa05336@gmail.com>
update readonly in manpage:
Currently both the readwrite and readonly are worked well,
update the readonly section.
One commit in linux/driver/md. Cleared "MD_CLOSING bit" to
Fixes: af8d8e6f0315 ("md: changes for MD_STILL_CLOSED flag")
Signed-off-by: Zhilong Liu <zlliu@suse.com>
---
mdadm.8.in | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index 744c12b..f10a8b8 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -925,7 +925,8 @@ will not try to be so clever.
Start the array
.B read only
rather than read-write as normal. No writes will be allowed to the
-array, and no resync, recovery, or reshape will be started.
+array, and no resync, recovery, or reshape will be started. It works with
+Create, Assemble, Manage and Misc mode.
.TP
.BR \-a ", " "\-\-auto{=yes,md,mdp,part,p}{NN}"
@@ -2232,7 +2233,7 @@ be in use.
.TP
.B \-\-readonly
-start the array readonly \(em not supported yet.
+start the array in readonly mode.
.SH MANAGE MODE
.HP 12
--
2.6.6
^ permalink raw reply related
* [PATCH v1 2/2] mdadm/manpage:clustered arrays don't support array-size yet
From: Zhilong Liu @ 2017-04-12 8:37 UTC (permalink / raw)
To: Jes.Sorensen; +Cc: linux-raid, Zhilong Liu
In-Reply-To: <1d2103a9-a7f2-54f8-ec51-ad0a04c6d9ae@gmail.com>
Update manpage for array-size section:
Clustered arrays don't support the --array-size yet.
Signed-off-by: Zhilong Liu <zlliu@suse.com>
---
mdadm.8.in | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mdadm.8.in b/mdadm.8.in
index f10a8b8..fb99a5c 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -541,6 +541,8 @@ A value of
restores the apparent size of the array to be whatever the real
amount of available space is.
+Clustered arrays do not support this parameter yet.
+
.TP
.BR \-c ", " \-\-chunk=
Specify chunk size of kilobytes. The default when creating an
--
2.6.6
^ permalink raw reply related
* Re: [PATCH 1/2] lib/raid6: Build proper files on corresponding arch
From: Daniel Axtens @ 2017-04-12 9:27 UTC (permalink / raw)
To: Michael Ellerman, Matt Brown, linuxppc-dev; +Cc: linux-raid
In-Reply-To: <87wpaqt8ia.fsf@concordia.ellerman.id.au>
Michael Ellerman <mpe@ellerman.id.au> writes:
> Matt Brown <matthew.brown.dev@gmail.com> writes:
>
>> diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
>> index 9c333e9..62b26d1 100644
>> --- a/lib/raid6/test/Makefile
>> +++ b/lib/raid6/test/Makefile
>> @@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
>> CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
>> else
>> HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
>> - gcc -c -x c - >&/dev/null && \
>> - rm ./-.o && echo yes)
>> + gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
>> ifeq ($(HAS_ALTIVEC),yes)
>> - OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
>> + CFLAGS += -I../../../arch/powerpc/include
>> + CFLAGS += -DCONFIG_ALTIVEC
>> + OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
>> + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
>
> The whitespace in here is a bit of a mess, but you should follow what's
> there and use spaces to indent your additions.
My apologies for steering you in the wrong direction here Matt!
Also, should the changes to altivec.uc in patch 2 be part of this patch?
From memory they are also needed to run the tests?
Regards,
Daniel
>
> cheers
^ permalink raw reply
* Linux software raid troubles
From: linuxknight @ 2017-04-12 14:06 UTC (permalink / raw)
To: linux-raid
Last weekend I was moving a server with a raid1 configuration,
controlled by a Intel Corporation 82801 SATA RAID Controller. Upon
reboot I noticed the degraded message (server hadnt been rebooted in a
couple years).
The raid1 array was two 500gb black WD drives. I wasnt able to locate
an identical 500gb disk, but did find a 2TB just to get things
mirrored again. The bios screen accepted the replacement disk and
said it would rebuild in the OS. mdsync seemed to do its thing but I
noticed mdmon process was taking 200% cpu. I let it go a few days
thinking it was just taking longer than normal to sync, then rebooted.
It was in a complete failed state and wouldnt boot at all. After
removing the 2TB disk I was able to boot into the OS again. I just
assumed I needed a similar drive size for the second part of the
mirror.
Today I installed an identical black WD 500gb drive and its doing the
same behavior. Currently running a bad block check but in the
meantime I found the wiki and read up a bit on some basic
troubleshooting and asking for help
(https://raid.wiki.kernel.org/index.php/Asking_for_help)
I wanted to attach the output of the commands on that page and hope
someone may have some ideas for rebuilding this second drive. Thank
you in advance for any suggestions. Im concerned at this point I only
have one good drive and could possibly lose everything if that failed.
mail:~ # smartctl --xall /dev/sda
smartctl 6.0 2012-10-10 r3643 [i686-linux-3.1.10-1.29-pae] (SUSE RPM)
Copyright (C) 2002-12, Bruce Allen, Christian Franke, www.smartmontools.org
=== START OF INFORMATION SECTION ===
Model Family: Western Digital Caviar Black
Device Model: WDC WD5002AALX-00J37A0
Serial Number: WD-WMAYUL169523
LU WWN Device Id: 5 0014ee 104a23be3
Firmware Version: 15.01H15
User Capacity: 500,107,862,016 bytes [500 GB]
Sector Size: 512 bytes logical/physical
Device is: In smartctl database [for details use: -P show]
ATA Version is: ATA8-ACS (minor revision not indicated)
SATA Version is: SATA 3.0, 6.0 Gb/s (current: 3.0 Gb/s)
Local Time is: Wed Apr 12 09:33:54 2017 EDT
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
AAM feature is: Unavailable
APM feature is: Unavailable
Rd look-ahead is: Enabled
Write cache is: Enabled
ATA Security is: Disabled, frozen [SEC2]
=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED
General SMART Values:
Offline data collection status: (0x82) Offline data collection activity
was completed without error.
Auto Offline Data Collection: Enabled.
Self-test execution status: ( 0) The previous self-test routine completed
without error or no self-test has ever
been run.
Total time to complete Offline
data collection: ( 8280) seconds.
Offline data collection
capabilities: (0x7b) SMART execute Offline immediate.
Auto Offline data collection
on/off supp
ort.
Suspend Offline collection upon new
command.
Offline surface scan supported.
Self-test supported.
Conveyance Self-test supported.
Selective Self-test supported.
SMART capabilities: (0x0003) Saves SMART data before entering
power-saving mode.
Supports SMART auto save timer.
Error logging capability: (0x01) Error logging supported.
General Purpose Logging supported.
Short self-test routine
recommended polling time: ( 2) minutes.
Extended self-test routine
recommended polling time: ( 84) minutes.
Conveyance self-test routine
recommended polling time: ( 5) minutes.
SCT capabilities: (0x3037) SCT Status supported.
SCT Feature Control supported.
SCT Data Table supported.
SMART Attributes Data Structure revision number: 16
Vendor Specific SMART Attributes with Thresholds:
ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
1 Raw_Read_Error_Rate POSR-K 200 200 051 - 273
3 Spin_Up_Time POS--K 144 144 021 - 3783
4 Start_Stop_Count -O--CK 100 100 000 - 42
5 Reallocated_Sector_Ct PO--CK 200 200 140 - 0
7 Seek_Error_Rate -OSR-K 200 200 000 - 0
9 Power_On_Hours -O--CK 046 046 000 - 39646
10 Spin_Retry_Count -O--CK 100 253 000 - 0
11 Calibration_Retry_Count -O--CK 100 253 000 - 0
12 Power_Cycle_Count -O--CK 100 100 000 - 39
192 Power-Off_Retract_Count -O--CK 200 200 000 - 36
193 Load_Cycle_Count -O--CK 200 200 000 - 5
194 Temperature_Celsius -O---K 104 104 000 - 39
196 Reallocated_Event_Count -O--CK 200 200 000 - 0
197 Current_Pending_Sector -O--CK 200 200 000 - 9
198 Offline_Uncorrectable ----CK 200 200 000 - 7
199 UDMA_CRC_Error_Count -O--CK 200 200 000 - 0
200 Multi_Zone_Error_Rate ---R-- 200 200 000 - 15
||||||_ K auto-keep
|||||__ C event count
||||___ R error rate
|||____ S speed/performance
||_____ O updated online
|______ P prefailure warning
General Purpose Log Directory Version 1
SMART Log Directory Version 1 [multi-sector log support]
GP/S Log at address 0x00 has 1 sectors [Log Directory]
SMART Log at address 0x01 has 1 sectors [Summary SMART error log]
SMART Log at address 0x02 has 5 sectors [Comprehensive SMART error log]
GP Log at address 0x03 has 6 sectors [Ext. Comprehensive SMART error log]
SMART Log at address 0x06 has 1 sectors [SMART self-test log]
GP Log at address 0x07 has 1 sectors [Extended self-test log]
SMART Log at address 0x09 has 1 sectors [Selective self-test log]
GP Log at address 0x10 has 1 sectors [NCQ Command Error log]
GP Log at address 0x11 has 1 sectors [SATA Phy Event Counters]
GP/S Log at address 0x80 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x81 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x82 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x83 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x84 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x85 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x86 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x87 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x88 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x89 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8a has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8b has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8c has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8d has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8e has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8f has 16 sectors [Host vendor specific log]
GP/S Log at address 0x90 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x91 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x92 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x93 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x94 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x95 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x96 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x97 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x98 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x99 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9a has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9b has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9c has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9d has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9e has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9f has 16 sectors [Host vendor specific log]
GP/S Log at address 0xa0 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa1 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa2 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa3 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa4 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa5 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa6 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa7 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa8 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xa9 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xaa has 1 sectors [Device vendor specific log]
GP/S Log at address 0xab has 1 sectors [Device vendor specific log]
GP/S Log at address 0xac has 1 sectors [Device vendor specific log]
GP/S Log at address 0xad has 1 sectors [Device vendor specific log]
GP/S Log at address 0xae has 1 sectors [Device vendor specific log]
GP/S Log at address 0xaf has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb0 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb1 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb2 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb3 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb4 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb5 has 1 sectors [Device vendor specific log]
GP Log at address 0xb6 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb7 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xbd has 1 sectors [Device vendor specific log]
GP/S Log at address 0xc0 has 1 sectors [Device vendor specific log]
GP Log at address 0xc1 has 24 sectors [Device vendor specific log]
GP/S Log at address 0xe0 has 1 sectors [SCT Command/Status]
GP/S Log at address 0xe1 has 1 sectors [SCT Data Transfer]
SMART Extended Comprehensive Error Log Version: 1 (6 sectors)
Device Error Count: 209 (device log contains only the most recent 24 errors)
CR = Command Register
FEATR = Features Register
COUNT = Count (was: Sector Count) Register
LBA_48 = Upper bytes of LBA High/Mid/Low Registers ] ATA-8
LH = LBA High (was: Cylinder High) Register ] LBA
LM = LBA Mid (was: Cylinder Low) Register ] Register
LL = LBA Low (was: Sector Number) Register ]
DV = Device (was: Device/Head) Register
DC = Device Control Register
ER = Error register
ST = Status register
Powered_Up_Time is measured from power on, and printed as
DDd+hh:mm:SS.sss where DD=days, hh=hours, mm=minutes,
SS=sec, and sss=millisec. It "wraps" after 49.710 days.
Error 209 [16] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:11:05.460 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:11:05.460 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:11:05.460 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:11:05.457 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:11:05.457 SET
FEATURES [Set tra
nsfer mode]
Error 208 [15] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:11:03.702 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:11:03.702 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:11:03.702 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:11:03.701 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:11:03.701 SET
FEATURES [Set tra
nsfer mode]
Error 207 [14] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:11:01.947 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:11:01.947 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:11:01.947 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:11:01.944 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:11:01.944 SET
FEATURES [Set tra
nsfer mode]
Error 206 [13] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:11:00.189 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:11:00.189 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:11:00.189 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:11:00.188 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:11:00.188 SET
FEATURES [Set tra
nsfer mode]
Error 205 [12] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:10:58.434 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:10:58.434 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:58.434 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:10:58.431 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:10:58.431 SET
FEATURES [Set tra
nsfer mode]
Error 204 [11] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:10:56.681 READ FPDMA QUEUED
ea 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:56.660 FLUSH CACHE EXT
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:10:56.659 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:56.659 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:10:56.658 IDENTIFY DEVICE
Error 203 [10] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:10:54.903 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:10:54.903 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:54.903 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:10:54.901 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:10:54.901 SET
FEATURES [Set tra
nsfer mode]
Error 202 [9] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 ho
urs)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:10:53.148 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:10:53.147 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:53.146 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:10:53.145 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:10:53.145 SET
FEATURES [Set tra
nsfer mode]
SMART Extended Self-test Log Version: 1 (1 sectors)
No self-tests have been logged. [To run self-tests, use: smartctl -t]
SMART Selective self-test log data structure revision number 1
SPAN MIN_LBA MAX_LBA CURRENT_TEST_STATUS
1 0 0 Not_testing
2 0 0 Not_testing
3 0 0 Not_testing
4 0 0 Not_testing
5 0 0 Not_testing
Selective self-test flags (0x0):
After scanning selected spans, do NOT read-scan remainder of disk.
If Selective self-test is pending on power-up, resume after 0 minute delay.
SCT Status Version: 3
SCT Version (vendor specific): 258 (0x0102)
SCT Support Level: 1
Device State: Active (0)
Current Temperature: 39 Celsius
Power Cycle Min/Max Temperature: 30/39 Celsius
Lifetime Min/Max Temperature: 0/39 Celsius
Under/Over Temperature Limit Count: 0/0
SCT Temperature History Version: 2
Temperature Sampling Period: 1 minute
Temperature Logging Interval: 1 minute
Min/Max recommended Temperature: 0/60 Celsius
Min/Max Temperature Limit: -41/85 Celsius
Temperature History Size (Index): 478 (368)
Index Estimated Time Temperature Celsius
369 2017-04-12 01:36 29 **********
... ..(198 skipped). .. **********
90 2017-04-12 04:55 29 **********
91 2017-04-12 04:56 28 *********
92 2017-04-12 04:57 29 **********
... ..( 71 skipped). .. **********
164 2017-04-12 06:09 29 **********
165 2017-04-12 06:10 ? -
166 2017-04-12 06:11 30 ***********
167 2017-04-12 06:12 30 ***********
168 2017-04-12 06:13 30 ***********
169 2017-04-12 06:14 31 ************
170 2017-04-12 06:15 32 *************
... ..( 3 skipped). .. *************
174 2017-04-12 06:19 32 *************
175 2017-04-12 06:20 33 **************
176 2017-04-12 06:21 33 **************
177 2017-04-12 06:22 33 **************
178 2017-04-12 06:23 34 ***************
179 2017-04-12 06:24 34 ***************
180 2017-04-12 06:25 35 ****************
... ..( 8 skipped). .. ****************
189 2017-04-12 06:34 35 ****************
190 2017-04-12 06:35 36 *****************
... ..( 23 skipped). .. *****************
214 2017-04-12 06:59 36 *****************
215 2017-04-12 07:00 37 ******************
... ..( 4 skipped). .. ******************
220 2017-04-12 07:05 37 ******************
221 2017-04-12 07:06 38 *******************
222 2017-04-12 07:07 37 ******************
223 2017-04-12 07:08 38 *******************
... ..( 6 skipped). .. *******************
230 2017-04-12 07:15 38 *******************
231 2017-04-12 07:16 37 ******************
232 2017-04-12 07:17 38 *******************
... ..( 14 skipped). .. *******************
247 2017-04-12 07:32 38 *******************
248 2017-04-12 07:33 39 ********************
249 2017-04-12 07:34 39 ********************
250 2017-04-12 07:35 38 *******************
251 2017-04-12 07:36 39 ********************
... ..( 4 skipped). .. ********************
256 2017-04-12 07:41 39 ********************
257 2017-04-12 07:42 29 **********
... ..(110 skipped). .. **********
368 2017-04-12 09:33 29 **********
SCT Error Recovery Control command not supported
Device Statistics (GP Log 0x04) not supported
SATA Phy Event Counters (GP Log 0x11)
ID Size Value Description
0x0001 2 0 Command failed due to ICRC error
0x0002 2 0 R_ERR response for data FIS
0x0003 2 0 R_ERR response for device-to-host data FIS
0x0004 2 0 R_ERR response for host-to-device data FIS
0x0005 2 0 R_ERR response for non-data FIS
0x0006 2 0 R_ERR response for device-to-host non-data FIS
0x0007 2 0 R_ERR response for host-to-device non-data FIS
0x000a 2 7 Device-to-host register FISes sent due to a COMRESET
0x000b 2 0 CRC errors within host-to-device FIS
0x8000 4 5831 Vendor specific
mail:~ # mdadm --examine /dev/sda
/dev/sda:
Magic : Intel Raid ISM Cfg Sig.
Version : 1.1.00
Orig Family : 80d98105
Family : 68a98654
Generation : 00b83763
Attributes : All supported
UUID : 81a6fcf3:48d205e9:aa868e3f:9ad94fa5
Checksum : 7e0e85bb correct
MPB Sectors : 2
Disks : 3
RAID Devices : 1
[Volume0]:
UUID : 44c0fda9:b2d38c01:e48120f6:4bed6635
RAID Level : 1
Members : 2
Slots : [__]
Failed disk : 1
This Slot : ?
Array Size : 976766976 (465.76 GiB 500.10 GB)
Per Dev Size : 976767240 (465.76 GiB 500.10 GB)
Sector Offset : 0
Num Stripes : 3815496
Chunk Size : 64 KiB
Reserved : 0
Migrate State : idle
Map State : failed
Dirty State : dirty
Disk00 Serial : WD-WMAYUL169523
State : active failed
Id : 00040000
Usable Size : 976766862 (465.76 GiB 500.10 GB)
Disk01 Serial : WD-WCC6Y1VENZK4
State : active failed
Id : 00050000
Usable Size : 976766862 (465.76 GiB 500.10 GB)
Disk02 Serial : Z4Z6V3CV:0
State : active failed
Id : ffffffff
Usable Size : 3907022862 (1863.01 GiB 2000.40 GB)
Disk Serial : WD-WMAYUL169523
State : active failed
Id : 00040000
Usable Size : 976766862 (465.76 GiB 500.10 GB)
mail:~ # mdadm --detail /dev/sda
mdadm: /dev/sda does not appear to be an md device
mail:~ # mdadm --detail /dev/md126
md126 md126p1 md126p2
mail:~ # mdadm --detail /dev/md126
md126 md126p1 md126p2
mail:~ # mdadm --detail /dev/md126
/dev/md126:
Container : /dev/md127, member 0
Raid Level : raid1
Array Size : 488383488 (465.76 GiB 500.10 GB)
Used Dev Size : 488383620 (465.76 GiB 500.10 GB)
Raid Devices : 2
Total Devices : 1
State : clean, degraded
Active Devices : 1
Working Devices : 1
Failed Devices : 0
Spare Devices : 0
UUID : 44c0fda9:b2d38c01:e48120f6:4bed6635
Number Major Minor RaidDevice State
1 8 0 0 active sync /dev/sda
1 0 0 1 removed
mail:~ # mdadm --detail /dev/md127
/dev/md127:
Version : imsm
Raid Level : container
Total Devices : 2
Working Devices : 2
UUID : 81a6fcf3:48d205e9:aa868e3f:9ad94fa5
Member Arrays : /dev/md126
Number Major Minor RaidDevice
0 8 16 - /dev/sdb
1 8 0 - /dev/sda
mail:~/lsdrv # ./lsdrv
PCI [ahci] 00:1f.2 RAID bus controller: Intel Corporation 82801 SATA
RAID Controller (rev 05)
├scsi 0:0:0:0 ATAPI iHAS424 B {3524253_2N8147500192}
│└sr0 1.00g [11:0] Empty/Unknown
├scsi 1:x:x:x [Empty]
├scsi 2:x:x:x [Empty]
├scsi 3:x:x:x [Empty]
├scsi 4:0:0:0 ATA WDC WD5002AALX-0 {WD-WMAYUL169523}
│└sda 465.76g [8:0] isw_raid_member
│ ├md126 465.76g [9:126] MD vexternal:/md127/0 raid1 (2) active
DEGRADED, 64k Chunk, recover (none) none
{44c0fda9:b2d38c01:e48120f6:4bed6635}
│ ││ Partitioned (dos)
│ │├md126p1 4.01g [259:0] swap {57b97914-1b5f-4ac9-b7ca-c0e866535f68}
│ │└md126p2 461.75g [259:1] Partitioned (dos)
{bc3d52aa-a6d5-49a5-ab72-333b8dd5bc6d}
│ │ └Mounted as /dev/md126p2 @ /
│ ├md127 0.00k [9:127] MD vexternal:imsm () inactive, None (None)
None {81a6fcf3:48d205e9:aa868e3f:9ad94fa5}
│ │ Empty/Unknown
│ ├sda1 4.01g [8:1] swap {57b97914-1b5f-4ac9-b7ca-c0e866535f68}
│ └sda2 461.75g [8:2] Partitioned (dos) {bc3d52aa-a6d5-49a5-ab72-333b8dd5bc6d}
└scsi 5:0:0:0 ATA WDC WD5003AZEX-0 {WD-WCC6Y1VENZK4}
└sdb 465.76g [8:16] isw_raid_member
└md127 0.00k [9:127] MD vexternal:imsm () inactive, None (None)
None {81a6fcf3:48d205e9:aa868e3f:9ad94fa5}
Empty/Unknown
PCI [sata_sil24] 04:00.0 RAID bus controller: Silicon Image, Inc. SiI
3124 PCI-X Serial ATA Controller (rev 02)
├scsi 6:x:x:x [Empty]
├scsi 7:x:x:x [Empty]
├scsi 8:x:x:x [Empty]
└scsi 9:x:x:x [Empty]
mail:~/lsdrv # cat /proc/mdstat
Personalities : [raid1] [raid0] [raid10] [raid6] [raid5] [raid4]
md126 : active raid1 sda[1]
488383488 blocks super external:/md127/0 [2/1] [U_]
md127 : inactive sda[1](S) sdb[0](S)
5928 blocks super external:imsm
unused devices: <none>
mail:~/lsdrv #
^ permalink raw reply
* Linux software raid troubles
From: linuxknight @ 2017-04-12 14:31 UTC (permalink / raw)
To: linux-raid
Last weekend I was moving a server with a raid1 configuration,
controlled by a Intel Corporation 82801 SATA RAID Controller. Upon
reboot I noticed the degraded message (server hadnt been rebooted in a
couple years).
The raid1 array was two 500gb black WD drives. I wasnt able to locate
an identical 500gb disk, but did find a 2TB just to get things
mirrored again. The bios screen accepted the replacement disk and
said it would rebuild in the OS. mdsync seemed to do its thing but I
noticed mdmon process was taking 200% cpu. I let it go a few days
thinking it was just taking longer than normal to sync, then rebooted.
It was in a complete failed state and wouldnt boot at all. After
removing the 2TB disk I was able to boot into the OS again. I just
assumed I needed a similar drive size for the second part of the
mirror.
Today I installed an identical black WD 500gb drive and its doing the
same behavior. Currently running a bad block check but in the
meantime I found the wiki and read up a bit on some basic
troubleshooting and asking for help
(https://raid.wiki.kernel.org/index.php/Asking_for_help)
I wanted to attach the output of the commands on that page and hope
someone may have some ideas for rebuilding this second drive. Thank
you in advance for any suggestions. Im concerned at this point I only
have one good drive and could possibly lose everything if that failed.
mail:~ # smartctl --xall /dev/sda
smartctl 6.0 2012-10-10 r3643 [i686-linux-3.1.10-1.29-pae] (SUSE RPM)
Copyright (C) 2002-12, Bruce Allen, Christian Franke, www.smartmontools.org
=== START OF INFORMATION SECTION ===
Model Family: Western Digital Caviar Black
Device Model: WDC WD5002AALX-00J37A0
Serial Number: WD-WMAYUL169523
LU WWN Device Id: 5 0014ee 104a23be3
Firmware Version: 15.01H15
User Capacity: 500,107,862,016 bytes [500 GB]
Sector Size: 512 bytes logical/physical
Device is: In smartctl database [for details use: -P show]
ATA Version is: ATA8-ACS (minor revision not indicated)
SATA Version is: SATA 3.0, 6.0 Gb/s (current: 3.0 Gb/s)
Local Time is: Wed Apr 12 09:33:54 2017 EDT
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
AAM feature is: Unavailable
APM feature is: Unavailable
Rd look-ahead is: Enabled
Write cache is: Enabled
ATA Security is: Disabled, frozen [SEC2]
=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED
General SMART Values:
Offline data collection status: (0x82) Offline data collection activity
was completed without error.
Auto Offline Data Collection: Enabled.
Self-test execution status: ( 0) The previous self-test routine completed
without error or no self-test has ever
been run.
Total time to complete Offline
data collection: ( 8280) seconds.
Offline data collection
capabilities: (0x7b) SMART execute Offline immediate.
Auto Offline data collection
on/off supp
ort.
Suspend Offline collection upon new
command.
Offline surface scan supported.
Self-test supported.
Conveyance Self-test supported.
Selective Self-test supported.
SMART capabilities: (0x0003) Saves SMART data before entering
power-saving mode.
Supports SMART auto save timer.
Error logging capability: (0x01) Error logging supported.
General Purpose Logging supported.
Short self-test routine
recommended polling time: ( 2) minutes.
Extended self-test routine
recommended polling time: ( 84) minutes.
Conveyance self-test routine
recommended polling time: ( 5) minutes.
SCT capabilities: (0x3037) SCT Status supported.
SCT Feature Control supported.
SCT Data Table supported.
SMART Attributes Data Structure revision number: 16
Vendor Specific SMART Attributes with Thresholds:
ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
1 Raw_Read_Error_Rate POSR-K 200 200 051 - 273
3 Spin_Up_Time POS--K 144 144 021 - 3783
4 Start_Stop_Count -O--CK 100 100 000 - 42
5 Reallocated_Sector_Ct PO--CK 200 200 140 - 0
7 Seek_Error_Rate -OSR-K 200 200 000 - 0
9 Power_On_Hours -O--CK 046 046 000 - 39646
10 Spin_Retry_Count -O--CK 100 253 000 - 0
11 Calibration_Retry_Count -O--CK 100 253 000 - 0
12 Power_Cycle_Count -O--CK 100 100 000 - 39
192 Power-Off_Retract_Count -O--CK 200 200 000 - 36
193 Load_Cycle_Count -O--CK 200 200 000 - 5
194 Temperature_Celsius -O---K 104 104 000 - 39
196 Reallocated_Event_Count -O--CK 200 200 000 - 0
197 Current_Pending_Sector -O--CK 200 200 000 - 9
198 Offline_Uncorrectable ----CK 200 200 000 - 7
199 UDMA_CRC_Error_Count -O--CK 200 200 000 - 0
200 Multi_Zone_Error_Rate ---R-- 200 200 000 - 15
||||||_ K auto-keep
|||||__ C event count
||||___ R error rate
|||____ S speed/performance
||_____ O updated online
|______ P prefailure warning
General Purpose Log Directory Version 1
SMART Log Directory Version 1 [multi-sector log support]
GP/S Log at address 0x00 has 1 sectors [Log Directory]
SMART Log at address 0x01 has 1 sectors [Summary SMART error log]
SMART Log at address 0x02 has 5 sectors [Comprehensive SMART error log]
GP Log at address 0x03 has 6 sectors [Ext. Comprehensive SMART error log]
SMART Log at address 0x06 has 1 sectors [SMART self-test log]
GP Log at address 0x07 has 1 sectors [Extended self-test log]
SMART Log at address 0x09 has 1 sectors [Selective self-test log]
GP Log at address 0x10 has 1 sectors [NCQ Command Error log]
GP Log at address 0x11 has 1 sectors [SATA Phy Event Counters]
GP/S Log at address 0x80 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x81 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x82 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x83 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x84 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x85 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x86 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x87 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x88 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x89 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8a has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8b has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8c has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8d has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8e has 16 sectors [Host vendor specific log]
GP/S Log at address 0x8f has 16 sectors [Host vendor specific log]
GP/S Log at address 0x90 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x91 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x92 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x93 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x94 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x95 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x96 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x97 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x98 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x99 has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9a has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9b has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9c has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9d has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9e has 16 sectors [Host vendor specific log]
GP/S Log at address 0x9f has 16 sectors [Host vendor specific log]
GP/S Log at address 0xa0 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa1 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa2 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa3 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa4 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa5 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa6 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa7 has 16 sectors [Device vendor specific log]
GP/S Log at address 0xa8 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xa9 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xaa has 1 sectors [Device vendor specific log]
GP/S Log at address 0xab has 1 sectors [Device vendor specific log]
GP/S Log at address 0xac has 1 sectors [Device vendor specific log]
GP/S Log at address 0xad has 1 sectors [Device vendor specific log]
GP/S Log at address 0xae has 1 sectors [Device vendor specific log]
GP/S Log at address 0xaf has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb0 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb1 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb2 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb3 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb4 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb5 has 1 sectors [Device vendor specific log]
GP Log at address 0xb6 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xb7 has 1 sectors [Device vendor specific log]
GP/S Log at address 0xbd has 1 sectors [Device vendor specific log]
GP/S Log at address 0xc0 has 1 sectors [Device vendor specific log]
GP Log at address 0xc1 has 24 sectors [Device vendor specific log]
GP/S Log at address 0xe0 has 1 sectors [SCT Command/Status]
GP/S Log at address 0xe1 has 1 sectors [SCT Data Transfer]
SMART Extended Comprehensive Error Log Version: 1 (6 sectors)
Device Error Count: 209 (device log contains only the most recent 24 errors)
CR = Command Register
FEATR = Features Register
COUNT = Count (was: Sector Count) Register
LBA_48 = Upper bytes of LBA High/Mid/Low Registers ] ATA-8
LH = LBA High (was: Cylinder High) Register ] LBA
LM = LBA Mid (was: Cylinder Low) Register ] Register
LL = LBA Low (was: Sector Number) Register ]
DV = Device (was: Device/Head) Register
DC = Device Control Register
ER = Error register
ST = Status register
Powered_Up_Time is measured from power on, and printed as
DDd+hh:mm:SS.sss where DD=days, hh=hours, mm=minutes,
SS=sec, and sss=millisec. It "wraps" after 49.710 days.
Error 209 [16] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:11:05.460 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:11:05.460 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:11:05.460 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:11:05.457 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:11:05.457 SET
FEATURES [Set tra
nsfer mode]
Error 208 [15] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:11:03.702 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:11:03.702 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:11:03.702 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:11:03.701 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:11:03.701 SET
FEATURES [Set tra
nsfer mode]
Error 207 [14] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:11:01.947 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:11:01.947 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:11:01.947 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:11:01.944 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:11:01.944 SET
FEATURES [Set tra
nsfer mode]
Error 206 [13] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:11:00.189 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:11:00.189 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:11:00.189 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:11:00.188 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:11:00.188 SET
FEATURES [Set tra
nsfer mode]
Error 205 [12] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:10:58.434 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:10:58.434 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:58.434 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:10:58.431 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:10:58.431 SET
FEATURES [Set tra
nsfer mode]
Error 204 [11] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:10:56.681 READ FPDMA QUEUED
ea 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:56.660 FLUSH CACHE EXT
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:10:56.659 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:56.659 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:10:56.658 IDENTIFY DEVICE
Error 203 [10] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 h
ours)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:10:54.903 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:10:54.903 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:54.903 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:10:54.901 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:10:54.901 SET
FEATURES [Set tra
nsfer mode]
Error 202 [9] occurred at disk power-on lifetime: 39645 hours (1651
days + 21 ho
urs)
When the command that caused the error occurred, the device was
active or idle
.
After command completion occurred, registers were:
ER -- ST COUNT LBA_48 LH LM LL DV DC
-- -- -- == -- == == == -- -- -- -- --
40 -- 51 00 00 00 00 13 34 8f 60 40 00 Error: UNC at LBA =
0x13348f60 = 32221
1680
Commands leading to the command that caused the error were:
CR FEATR COUNT LBA_48 LH LM LL DV DC Powered_Up_Time Command/Feature_Name
-- == -- == -- == == == -- -- -- -- -- --------------- --------------------
60 00 08 00 00 00 00 13 34 8f 60 40 08 01:10:53.148 READ FPDMA QUEUED
ef 00 10 00 02 00 00 00 00 00 00 a0 08 01:10:53.147 SET
FEATURES [Reserve
d for Serial ATA]
27 00 00 00 00 00 00 00 00 00 00 e0 08 01:10:53.146 READ NATIVE
MAX ADDRE
SS EXT
ec 00 00 00 00 00 00 00 00 00 00 a0 08 01:10:53.145 IDENTIFY DEVICE
ef 00 03 00 46 00 00 00 00 00 00 a0 08 01:10:53.145 SET
FEATURES [Set tra
nsfer mode]
SMART Extended Self-test Log Version: 1 (1 sectors)
No self-tests have been logged. [To run self-tests, use: smartctl -t]
SMART Selective self-test log data structure revision number 1
SPAN MIN_LBA MAX_LBA CURRENT_TEST_STATUS
1 0 0 Not_testing
2 0 0 Not_testing
3 0 0 Not_testing
4 0 0 Not_testing
5 0 0 Not_testing
Selective self-test flags (0x0):
After scanning selected spans, do NOT read-scan remainder of disk.
If Selective self-test is pending on power-up, resume after 0 minute delay.
SCT Status Version: 3
SCT Version (vendor specific): 258 (0x0102)
SCT Support Level: 1
Device State: Active (0)
Current Temperature: 39 Celsius
Power Cycle Min/Max Temperature: 30/39 Celsius
Lifetime Min/Max Temperature: 0/39 Celsius
Under/Over Temperature Limit Count: 0/0
SCT Temperature History Version: 2
Temperature Sampling Period: 1 minute
Temperature Logging Interval: 1 minute
Min/Max recommended Temperature: 0/60 Celsius
Min/Max Temperature Limit: -41/85 Celsius
Temperature History Size (Index): 478 (368)
Index Estimated Time Temperature Celsius
369 2017-04-12 01:36 29 **********
... ..(198 skipped). .. **********
90 2017-04-12 04:55 29 **********
91 2017-04-12 04:56 28 *********
92 2017-04-12 04:57 29 **********
... ..( 71 skipped). .. **********
164 2017-04-12 06:09 29 **********
165 2017-04-12 06:10 ? -
166 2017-04-12 06:11 30 ***********
167 2017-04-12 06:12 30 ***********
168 2017-04-12 06:13 30 ***********
169 2017-04-12 06:14 31 ************
170 2017-04-12 06:15 32 *************
... ..( 3 skipped). .. *************
174 2017-04-12 06:19 32 *************
175 2017-04-12 06:20 33 **************
176 2017-04-12 06:21 33 **************
177 2017-04-12 06:22 33 **************
178 2017-04-12 06:23 34 ***************
179 2017-04-12 06:24 34 ***************
180 2017-04-12 06:25 35 ****************
... ..( 8 skipped). .. ****************
189 2017-04-12 06:34 35 ****************
190 2017-04-12 06:35 36 *****************
... ..( 23 skipped). .. *****************
214 2017-04-12 06:59 36 *****************
215 2017-04-12 07:00 37 ******************
... ..( 4 skipped). .. ******************
220 2017-04-12 07:05 37 ******************
221 2017-04-12 07:06 38 *******************
222 2017-04-12 07:07 37 ******************
223 2017-04-12 07:08 38 *******************
... ..( 6 skipped). .. *******************
230 2017-04-12 07:15 38 *******************
231 2017-04-12 07:16 37 ******************
232 2017-04-12 07:17 38 *******************
... ..( 14 skipped). .. *******************
247 2017-04-12 07:32 38 *******************
248 2017-04-12 07:33 39 ********************
249 2017-04-12 07:34 39 ********************
250 2017-04-12 07:35 38 *******************
251 2017-04-12 07:36 39 ********************
... ..( 4 skipped). .. ********************
256 2017-04-12 07:41 39 ********************
257 2017-04-12 07:42 29 **********
... ..(110 skipped). .. **********
368 2017-04-12 09:33 29 **********
SCT Error Recovery Control command not supported
Device Statistics (GP Log 0x04) not supported
SATA Phy Event Counters (GP Log 0x11)
ID Size Value Description
0x0001 2 0 Command failed due to ICRC error
0x0002 2 0 R_ERR response for data FIS
0x0003 2 0 R_ERR response for device-to-host data FIS
0x0004 2 0 R_ERR response for host-to-device data FIS
0x0005 2 0 R_ERR response for non-data FIS
0x0006 2 0 R_ERR response for device-to-host non-data FIS
0x0007 2 0 R_ERR response for host-to-device non-data FIS
0x000a 2 7 Device-to-host register FISes sent due to a COMRESET
0x000b 2 0 CRC errors within host-to-device FIS
0x8000 4 5831 Vendor specific
mail:~ # mdadm --examine /dev/sda
/dev/sda:
Magic : Intel Raid ISM Cfg Sig.
Version : 1.1.00
Orig Family : 80d98105
Family : 68a98654
Generation : 00b83763
Attributes : All supported
UUID : 81a6fcf3:48d205e9:aa868e3f:9ad94fa5
Checksum : 7e0e85bb correct
MPB Sectors : 2
Disks : 3
RAID Devices : 1
[Volume0]:
UUID : 44c0fda9:b2d38c01:e48120f6:4bed6635
RAID Level : 1
Members : 2
Slots : [__]
Failed disk : 1
This Slot : ?
Array Size : 976766976 (465.76 GiB 500.10 GB)
Per Dev Size : 976767240 (465.76 GiB 500.10 GB)
Sector Offset : 0
Num Stripes : 3815496
Chunk Size : 64 KiB
Reserved : 0
Migrate State : idle
Map State : failed
Dirty State : dirty
Disk00 Serial : WD-WMAYUL169523
State : active failed
Id : 00040000
Usable Size : 976766862 (465.76 GiB 500.10 GB)
Disk01 Serial : WD-WCC6Y1VENZK4
State : active failed
Id : 00050000
Usable Size : 976766862 (465.76 GiB 500.10 GB)
Disk02 Serial : Z4Z6V3CV:0
State : active failed
Id : ffffffff
Usable Size : 3907022862 (1863.01 GiB 2000.40 GB)
Disk Serial : WD-WMAYUL169523
State : active failed
Id : 00040000
Usable Size : 976766862 (465.76 GiB 500.10 GB)
mail:~ # mdadm --detail /dev/sda
mdadm: /dev/sda does not appear to be an md device
mail:~ # mdadm --detail /dev/md126
md126 md126p1 md126p2
mail:~ # mdadm --detail /dev/md126
md126 md126p1 md126p2
mail:~ # mdadm --detail /dev/md126
/dev/md126:
Container : /dev/md127, member 0
Raid Level : raid1
Array Size : 488383488 (465.76 GiB 500.10 GB)
Used Dev Size : 488383620 (465.76 GiB 500.10 GB)
Raid Devices : 2
Total Devices : 1
State : clean, degraded
Active Devices : 1
Working Devices : 1
Failed Devices : 0
Spare Devices : 0
UUID : 44c0fda9:b2d38c01:e48120f6:4bed6635
Number Major Minor RaidDevice State
1 8 0 0 active sync /dev/sda
1 0 0 1 removed
mail:~ # mdadm --detail /dev/md127
/dev/md127:
Version : imsm
Raid Level : container
Total Devices : 2
Working Devices : 2
UUID : 81a6fcf3:48d205e9:aa868e3f:9ad94fa5
Member Arrays : /dev/md126
Number Major Minor RaidDevice
0 8 16 - /dev/sdb
1 8 0 - /dev/sda
mail:~/lsdrv # ./lsdrv
PCI [ahci] 00:1f.2 RAID bus controller: Intel Corporation 82801 SATA
RAID Controller (rev 05)
├scsi 0:0:0:0 ATAPI iHAS424 B {3524253_2N8147500192}
│└sr0 1.00g [11:0] Empty/Unknown
├scsi 1:x:x:x [Empty]
├scsi 2:x:x:x [Empty]
├scsi 3:x:x:x [Empty]
├scsi 4:0:0:0 ATA WDC WD5002AALX-0 {WD-WMAYUL169523}
│└sda 465.76g [8:0] isw_raid_member
│ ├md126 465.76g [9:126] MD vexternal:/md127/0 raid1 (2) active
DEGRADED, 64k Chunk, recover (none) none
{44c0fda9:b2d38c01:e48120f6:4bed6635}
│ ││ Partitioned (dos)
│ │├md126p1 4.01g [259:0] swap {57b97914-1b5f-4ac9-b7ca-c0e866535f68}
│ │└md126p2 461.75g [259:1] Partitioned (dos)
{bc3d52aa-a6d5-49a5-ab72-333b8dd5bc6d}
│ │ └Mounted as /dev/md126p2 @ /
│ ├md127 0.00k [9:127] MD vexternal:imsm () inactive, None (None)
None {81a6fcf3:48d205e9:aa868e3f:9ad94fa5}
│ │ Empty/Unknown
│ ├sda1 4.01g [8:1] swap {57b97914-1b5f-4ac9-b7ca-c0e866535f68}
│ └sda2 461.75g [8:2] Partitioned (dos) {bc3d52aa-a6d5-49a5-ab72-333b8dd5bc6d}
└scsi 5:0:0:0 ATA WDC WD5003AZEX-0 {WD-WCC6Y1VENZK4}
└sdb 465.76g [8:16] isw_raid_member
└md127 0.00k [9:127] MD vexternal:imsm () inactive, None (None)
None {81a6fcf3:48d205e9:aa868e3f:9ad94fa5}
Empty/Unknown
PCI [sata_sil24] 04:00.0 RAID bus controller: Silicon Image, Inc. SiI
3124 PCI-X Serial ATA Controller (rev 02)
├scsi 6:x:x:x [Empty]
├scsi 7:x:x:x [Empty]
├scsi 8:x:x:x [Empty]
└scsi 9:x:x:x [Empty]
mail:~/lsdrv # cat /proc/mdstat
Personalities : [raid1] [raid0] [raid10] [raid6] [raid5] [raid4]
md126 : active raid1 sda[1]
488383488 blocks super external:/md127/0 [2/1] [U_]
md127 : inactive sda[1](S) sdb[0](S)
5928 blocks super external:imsm
unused devices: <none>
^ permalink raw reply
* Re: Linux software raid troubles
From: Reindl Harald @ 2017-04-12 14:45 UTC (permalink / raw)
To: linuxknight, linux-raid
In-Reply-To: <CAAO=44Y=8xrnWvMp214RFq9Y-KVDQVfEYa2vL-Ahgmbvgs6Y4w@mail.gmail.com>
Am 12.04.2017 um 16:31 schrieb linuxknight:
> Last weekend I was moving a server with a raid1 configuration,
> controlled by a Intel Corporation 82801 SATA RAID Controller. Upon
> reboot I noticed the degraded message (server hadnt been rebooted in a
> couple years).
>
> The raid1 array was two 500gb black WD drives. I wasnt able to locate
> an identical 500gb disk, but did find a 2TB just to get things
> mirrored again. The bios screen accepted the replacement disk and
> said it would rebuild in the OS. mdsync seemed to do its thing but I
> noticed mdmon process was taking 200% cpu. I let it go a few days
> thinking it was just taking longer than normal to sync, then rebooted.
> It was in a complete failed state and wouldnt boot at all. After
> removing the 2TB disk I was able to boot into the OS again. I just
> assumed I needed a similar drive size for the second part of the
> mirror.
when you talk about a "SATA RAID Controller" and "The bios screen
accepted the replacement disk and said it would rebuild in the OS" this
sadly is not a "linux software raid" at it's own
197 Current_Pending_Sector -O--CK 200 200 000 - 9
198 Offline_Uncorrectable ----CK 200 200 000 - 7
i would strongly suggest https://www.gnu.org/software/ddrescue/ and make
a image of that disk because after 39646 Power_On_Hours it's likely that
the remaining disk fails completly in a short time and you could at
least restore the disk-image with "dd" to a new disk if that happens as
well as mount it with as loop-device
^ permalink raw reply
* Re: [md PATCH 1/2] md: allow creation of mdNNN arrays via md_mod/parameters/new_array
From: Coly Li @ 2017-04-12 14:48 UTC (permalink / raw)
To: NeilBrown, Shaohua Li; +Cc: linux-raid
In-Reply-To: <149197837299.19936.14922734851405940379.stgit@noble>
On 2017/4/12 下午2:26, NeilBrown wrote:
> The intention when creating the "new_array" parameter and the
> possibility of having array names line "md_HOME" was to transition
> away from the old way of creating arrays and to eventually only use
> this new way.
>
> The "old" way of creating array is to create a device node in /dev
> and then open it. The act of opening creates the array.
> This is problematic because sometimes the device node can be opened
> when we don't want to create an array. This can easily happen
> when some rule triggered by udev looks at a device as it is being
> destroyed. The node in /dev continues to exist for a short period
> after an array is stopped, and opening it during this time recreates
> the array (as an inactive array).
>
> Unfortunately no clear plan for the transition was created. It is now
> time to fix that.
>
> This patch allows devices with numeric names, like "md999" to be
> created by writing to "new_array". This will only work if the minor
> number given is not already in use. This will allow mdadm to
> support the creation of arrays with numbers > 511 (currently not
> possible) by writing to new_array.
> mdadm can, at some point, use this approach to create *all* arrays,
> which will allow the transition to only using the new-way.
>
> Signed-off-by: NeilBrown <neilb@suse.com>
Acted-by: Coly Li <colyli@suse.de>
> ---
> drivers/md/md.c | 34 ++++++++++++++++++++++++++++------
> 1 file changed, 28 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 9fe930109012..c3d3bae947a1 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -5164,6 +5164,14 @@ static void no_op(struct percpu_ref *r) {}
>
> static int md_alloc(dev_t dev, char *name)
> {
> + /* If dev is zero, name is the name of a device to allocate with
> + * an arbitrary minor number. It will be "md_???"
> + * If dev is non-zero it must be a device number with a MAJOR of
> + * MD_MAJOR or mdp_major. In this case, if "name" is NULL, then
> + * the device is being created by opening a node in /dev.
> + * If "name" is not NULL, the device is being created by
> + * writing to /sys/module/md_mod/parameters/new_array.
> + */
> static DEFINE_MUTEX(disks_mutex);
> struct mddev *mddev = mddev_find(dev);
> struct gendisk *disk;
> @@ -5189,7 +5197,7 @@ static int md_alloc(dev_t dev, char *name)
> if (mddev->gendisk)
> goto abort;
>
> - if (name) {
> + if (name && !dev) {
> /* Need to ensure that 'name' is not a duplicate.
> */
> struct mddev *mddev2;
> @@ -5203,6 +5211,11 @@ static int md_alloc(dev_t dev, char *name)
> }
> spin_unlock(&all_mddevs_lock);
> }
> + if (name && dev)
> + /*
> + * Creating /dev/mdNNN via "newarray", so adjust hold_active.
> + */
> + mddev->hold_active = UNTIL_STOP;
>
> error = -ENOMEM;
> mddev->queue = blk_alloc_queue(GFP_KERNEL);
> @@ -5279,21 +5292,30 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
>
> static int add_named_array(const char *val, struct kernel_param *kp)
> {
> - /* val must be "md_*" where * is not all digits.
> - * We allocate an array with a large free minor number, and
> + /* val must be "md_*" or "mdNNN".
> + * For "md_*" we allocate an array with a large free minor number, and
> * set the name to val. val must not already be an active name.
> + * For "mdNNN" we allocate an array with the minor number NNN
> + * which must not already be in use.
> */
> int len = strlen(val);
> char buf[DISK_NAME_LEN];
> + unsigned long devnum;
>
> while (len && val[len-1] == '\n')
> len--;
> if (len >= DISK_NAME_LEN)
> return -E2BIG;
> strlcpy(buf, val, len+1);
> - if (strncmp(buf, "md_", 3) != 0)
> - return -EINVAL;
> - return md_alloc(0, buf);
> + if (strncmp(buf, "md_", 3) == 0)
> + return md_alloc(0, buf);
> + if (strncmp(buf, "md", 2) == 0 &&
> + isdigit(buf[2]) &&
> + kstrtoul(buf+2, 10, &devnum) == 0 &&
> + devnum <= MINORMASK)
> + return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
> +
> + return -EINVAL;
> }
>
> static void md_safemode_timeout(unsigned long data)
>
>
^ permalink raw reply
* Re: [md PATCH 2/2] md: support disabling of create-on-open semantics.
From: Coly Li @ 2017-04-12 14:49 UTC (permalink / raw)
To: NeilBrown, Shaohua Li; +Cc: linux-raid
In-Reply-To: <149197837322.19936.7035050500466184535.stgit@noble>
On 2017/4/12 下午2:26, NeilBrown wrote:
> md allows a new array device to be created by simply
> opening a device file. This make it difficult to
> remove the device and udev is likely to open the device file
> as part of processing the REMOVE event.
>
> There is an alternate mechanism for creating arrays
> by writing to the new_array module parameter.
> When using tools that work with this parameter, it is
> best to disable the old semantics.
> This new module parameter allows that.
>
> Signed-off-by: NeilBrown <neilb@suse.com>
Acked-by: Coly Li <colyli@suse.de>
> ---
> drivers/md/md.c | 14 +++++++++++++-
> 1 file changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index c3d3bae947a1..a7ab769eacc3 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -174,6 +174,16 @@ static const struct block_device_operations md_fops;
>
> static int start_readonly;
>
> +/*
> + * The original mechanism for creating an md device is to create
> + * a device node in /dev and to open it. This causes races with device-close.
> + * The preferred method is to write to the "new_array" module parameter.
> + * This can avoid races.
> + * Setting create_on_open to false disables the original mechanism
> + * so all the races disappear.
> + */
> +static bool create_on_open = true;
> +
> /* bio_clone_mddev
> * like bio_clone_bioset, but with a local bio set
> */
> @@ -5286,7 +5296,8 @@ static int md_alloc(dev_t dev, char *name)
>
> static struct kobject *md_probe(dev_t dev, int *part, void *data)
> {
> - md_alloc(dev, NULL);
> + if (create_on_open)
> + md_alloc(dev, NULL);
> return NULL;
> }
>
> @@ -9202,6 +9213,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
> module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
> module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
> module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
> +module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
>
> MODULE_LICENSE("GPL");
> MODULE_DESCRIPTION("MD RAID framework");
>
>
^ permalink raw reply
* Re: Linux software raid troubles
From: Reindl Harald @ 2017-04-12 15:29 UTC (permalink / raw)
To: linuxknight, linux-raid
In-Reply-To: <CAAO=44bsG1yrmYbag1eruNA_tdXxqiJptnRyPB=4TW6r5771HQ@mail.gmail.com>
please no private-only respones
Am 12.04.2017 um 16:52 schrieb linuxknight:
> Thanks you for the reply. I was just examining the hardward in my
> server and it looks like there is an LSI card in there. If I create a
> new Hardware raid mirror in that controller, is it possible to use the
> ddrescue to get my current OS onto that mirror and boot from it? Im
> unfamiliar with the ddrescue but will certainly read up more.
"ddrescue" is at the end of the day the same as "dd"
it reads the whole drive block-by-block and writes it to a image file,
later you can do "dd if=image.mig of=/dev/sdX bs=1M" and you get a 100%
identical state of the disk
so just put out that drive, connect it to a ordinary SATA adapter, take
the image and be happy that you have a backup, if the RAID-controller
has stored whatever metadata on begin of the drive it's also part of the
image
and hence leave out that controller to get a 100% block-by-block copy of
the whole drive
> On Wed, Apr 12, 2017 at 10:45 AM, Reindl Harald <h.reindl@thelounge.net> wrote:
>>
>>
>> Am 12.04.2017 um 16:31 schrieb linuxknight:
>>>
>>> Last weekend I was moving a server with a raid1 configuration,
>>> controlled by a Intel Corporation 82801 SATA RAID Controller. Upon
>>> reboot I noticed the degraded message (server hadnt been rebooted in a
>>> couple years).
>>>
>>> The raid1 array was two 500gb black WD drives. I wasnt able to locate
>>> an identical 500gb disk, but did find a 2TB just to get things
>>> mirrored again. The bios screen accepted the replacement disk and
>>> said it would rebuild in the OS. mdsync seemed to do its thing but I
>>> noticed mdmon process was taking 200% cpu. I let it go a few days
>>> thinking it was just taking longer than normal to sync, then rebooted.
>>> It was in a complete failed state and wouldnt boot at all. After
>>> removing the 2TB disk I was able to boot into the OS again. I just
>>> assumed I needed a similar drive size for the second part of the
>>> mirror.
>>
>>
>> when you talk about a "SATA RAID Controller" and "The bios screen accepted
>> the replacement disk and said it would rebuild in the OS" this sadly is not
>> a "linux software raid" at it's own
>>
>> 197 Current_Pending_Sector -O--CK 200 200 000 - 9
>> 198 Offline_Uncorrectable ----CK 200 200 000 - 7
>>
>> i would strongly suggest https://www.gnu.org/software/ddrescue/ and make a
>> image of that disk because after 39646 Power_On_Hours it's likely that the
>> remaining disk fails completly in a short time and you could at least
>> restore the disk-image with "dd" to a new disk if that happens as well as
>> mount it with as loop-device
^ permalink raw reply
* Re: Linux software raid troubles
From: linuxknight @ 2017-04-12 15:36 UTC (permalink / raw)
To: Reindl Harald; +Cc: linux-raid
In-Reply-To: <cbadbdeb-7e6b-7784-e49c-dd2801903bb6@thelounge.net>
Thank you Reindl, Using your method would I be able to apply this IMG
file to a fresh raid1 mirror and still have it be bootable?
The reason I ask is I was looking at this guide,
https://www.data-medics.com/forum/how-to-clone-a-hard-drive-with-bad-sectors-using-ddrescue-t133.html
It has a method to transfer drive to drive. I was thinking I would
create the fresh RAID mirror on the dedicated LSI card, then ddrescue
possibly bad drive to the new raid mirror. Is this a bad idea?
On Wed, Apr 12, 2017 at 11:29 AM, Reindl Harald <h.reindl@thelounge.net> wrote:
> please no private-only respones
>
> Am 12.04.2017 um 16:52 schrieb linuxknight:
>>
>> Thanks you for the reply. I was just examining the hardward in my
>> server and it looks like there is an LSI card in there. If I create a
>> new Hardware raid mirror in that controller, is it possible to use the
>> ddrescue to get my current OS onto that mirror and boot from it? Im
>> unfamiliar with the ddrescue but will certainly read up more.
>
>
> "ddrescue" is at the end of the day the same as "dd"
>
> it reads the whole drive block-by-block and writes it to a image file, later
> you can do "dd if=image.mig of=/dev/sdX bs=1M" and you get a 100% identical
> state of the disk
>
> so just put out that drive, connect it to a ordinary SATA adapter, take the
> image and be happy that you have a backup, if the RAID-controller has stored
> whatever metadata on begin of the drive it's also part of the image
>
> and hence leave out that controller to get a 100% block-by-block copy of the
> whole drive
>
>
>> On Wed, Apr 12, 2017 at 10:45 AM, Reindl Harald <h.reindl@thelounge.net>
>> wrote:
>>>
>>>
>>>
>>> Am 12.04.2017 um 16:31 schrieb linuxknight:
>>>>
>>>>
>>>> Last weekend I was moving a server with a raid1 configuration,
>>>> controlled by a Intel Corporation 82801 SATA RAID Controller. Upon
>>>> reboot I noticed the degraded message (server hadnt been rebooted in a
>>>> couple years).
>>>>
>>>> The raid1 array was two 500gb black WD drives. I wasnt able to locate
>>>> an identical 500gb disk, but did find a 2TB just to get things
>>>> mirrored again. The bios screen accepted the replacement disk and
>>>> said it would rebuild in the OS. mdsync seemed to do its thing but I
>>>> noticed mdmon process was taking 200% cpu. I let it go a few days
>>>> thinking it was just taking longer than normal to sync, then rebooted.
>>>> It was in a complete failed state and wouldnt boot at all. After
>>>> removing the 2TB disk I was able to boot into the OS again. I just
>>>> assumed I needed a similar drive size for the second part of the
>>>> mirror.
>>>
>>>
>>>
>>> when you talk about a "SATA RAID Controller" and "The bios screen
>>> accepted
>>> the replacement disk and said it would rebuild in the OS" this sadly is
>>> not
>>> a "linux software raid" at it's own
>>>
>>> 197 Current_Pending_Sector -O--CK 200 200 000 - 9
>>> 198 Offline_Uncorrectable ----CK 200 200 000 - 7
>>>
>>> i would strongly suggest https://www.gnu.org/software/ddrescue/ and make
>>> a
>>> image of that disk because after 39646 Power_On_Hours it's likely that
>>> the
>>> remaining disk fails completly in a short time and you could at least
>>> restore the disk-image with "dd" to a new disk if that happens as well as
>>> mount it with as loop-device
>
>
^ permalink raw reply
* Re: Linux software raid troubles
From: Reindl Harald @ 2017-04-12 16:11 UTC (permalink / raw)
To: linuxknight; +Cc: linux-raid
In-Reply-To: <CAAO=44Y9YspCqyfvDgx30mGO_VhYZpuj9D8U1O6063B8VL61vw@mail.gmail.com>
Am 12.04.2017 um 17:36 schrieb linuxknight:
> Thank you Reindl, Using your method would I be able to apply this IMG
> file to a fresh raid1 mirror and still have it be bootable?
that's the whole point - there is no difference if you have another
phyiscal disk or a image-file as destination - thanks linux everything
is a file
whenever you play around with disks which might fail or are already
broken take a complete image as soon as possible because before you try
to restore something from that image you can even copy that one, try to
mount it, play around and whenever you are unsure if you damaged it's
state just make a fresh copy from the untouched first backup
> The reason I ask is I was looking at this guide,
> https://www.data-medics.com/forum/how-to-clone-a-hard-drive-with-bad-sectors-using-ddrescue-t133.html
> It has a method to transfer drive to drive. I was thinking I would
> create the fresh RAID mirror on the dedicated LSI card, then ddrescue
> possibly bad drive to the new raid mirror. Is this a bad idea?
>
> On Wed, Apr 12, 2017 at 11:29 AM, Reindl Harald <h.reindl@thelounge.net> wrote:
>> please no private-only respones
>>
>> Am 12.04.2017 um 16:52 schrieb linuxknight:
>>>
>>> Thanks you for the reply. I was just examining the hardward in my
>>> server and it looks like there is an LSI card in there. If I create a
>>> new Hardware raid mirror in that controller, is it possible to use the
>>> ddrescue to get my current OS onto that mirror and boot from it? Im
>>> unfamiliar with the ddrescue but will certainly read up more.
>>
>>
>> "ddrescue" is at the end of the day the same as "dd"
>>
>> it reads the whole drive block-by-block and writes it to a image file, later
>> you can do "dd if=image.mig of=/dev/sdX bs=1M" and you get a 100% identical
>> state of the disk
>>
>> so just put out that drive, connect it to a ordinary SATA adapter, take the
>> image and be happy that you have a backup, if the RAID-controller has stored
>> whatever metadata on begin of the drive it's also part of the image
>>
>> and hence leave out that controller to get a 100% block-by-block copy of the
>> whole drive
>>
>>
>>> On Wed, Apr 12, 2017 at 10:45 AM, Reindl Harald <h.reindl@thelounge.net>
>>> wrote:
>>>>
>>>>
>>>>
>>>> Am 12.04.2017 um 16:31 schrieb linuxknight:
>>>>>
>>>>>
>>>>> Last weekend I was moving a server with a raid1 configuration,
>>>>> controlled by a Intel Corporation 82801 SATA RAID Controller. Upon
>>>>> reboot I noticed the degraded message (server hadnt been rebooted in a
>>>>> couple years).
>>>>>
>>>>> The raid1 array was two 500gb black WD drives. I wasnt able to locate
>>>>> an identical 500gb disk, but did find a 2TB just to get things
>>>>> mirrored again. The bios screen accepted the replacement disk and
>>>>> said it would rebuild in the OS. mdsync seemed to do its thing but I
>>>>> noticed mdmon process was taking 200% cpu. I let it go a few days
>>>>> thinking it was just taking longer than normal to sync, then rebooted.
>>>>> It was in a complete failed state and wouldnt boot at all. After
>>>>> removing the 2TB disk I was able to boot into the OS again. I just
>>>>> assumed I needed a similar drive size for the second part of the
>>>>> mirror.
>>>>
>>>>
>>>>
>>>> when you talk about a "SATA RAID Controller" and "The bios screen
>>>> accepted
>>>> the replacement disk and said it would rebuild in the OS" this sadly is
>>>> not
>>>> a "linux software raid" at it's own
>>>>
>>>> 197 Current_Pending_Sector -O--CK 200 200 000 - 9
>>>> 198 Offline_Uncorrectable ----CK 200 200 000 - 7
>>>>
>>>> i would strongly suggest https://www.gnu.org/software/ddrescue/ and make
>>>> a
>>>> image of that disk because after 39646 Power_On_Hours it's likely that
>>>> the
>>>> remaining disk fails completly in a short time and you could at least
>>>> restore the disk-image with "dd" to a new disk if that happens as well as
>>>> mount it with as loop-device
^ permalink raw reply
* Re: [PATCH v1 1/2] mdadm/manpage:update manpage for readonly parameter
From: Jes Sorensen @ 2017-04-12 17:51 UTC (permalink / raw)
To: Zhilong Liu; +Cc: linux-raid
In-Reply-To: <1491986198-16642-1-git-send-email-zlliu@suse.com>
On 04/12/2017 04:36 AM, Zhilong Liu wrote:
> update readonly in manpage:
> Currently both the readwrite and readonly are worked well,
> update the readonly section.
> One commit in linux/driver/md. Cleared "MD_CLOSING bit" to
> Fixes: af8d8e6f0315 ("md: changes for MD_STILL_CLOSED flag")
>
> Signed-off-by: Zhilong Liu <zlliu@suse.com>
> ---
> mdadm.8.in | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
Applied!
Thanks,
Jes
> diff --git a/mdadm.8.in b/mdadm.8.in
> index 744c12b..f10a8b8 100644
> --- a/mdadm.8.in
> +++ b/mdadm.8.in
> @@ -925,7 +925,8 @@ will not try to be so clever.
> Start the array
> .B read only
> rather than read-write as normal. No writes will be allowed to the
> -array, and no resync, recovery, or reshape will be started.
> +array, and no resync, recovery, or reshape will be started. It works with
> +Create, Assemble, Manage and Misc mode.
>
> .TP
> .BR \-a ", " "\-\-auto{=yes,md,mdp,part,p}{NN}"
> @@ -2232,7 +2233,7 @@ be in use.
>
> .TP
> .B \-\-readonly
> -start the array readonly \(em not supported yet.
> +start the array in readonly mode.
>
> .SH MANAGE MODE
> .HP 12
>
^ permalink raw reply
* Re: [PATCH v1 2/2] mdadm/manpage:clustered arrays don't support array-size yet
From: Jes Sorensen @ 2017-04-12 17:57 UTC (permalink / raw)
To: Zhilong Liu; +Cc: linux-raid
In-Reply-To: <1491986247-16706-1-git-send-email-zlliu@suse.com>
On 04/12/2017 04:37 AM, Zhilong Liu wrote:
> Update manpage for array-size section:
> Clustered arrays don't support the --array-size yet.
>
> Signed-off-by: Zhilong Liu <zlliu@suse.com>
> ---
> mdadm.8.in | 2 ++
> 1 file changed, 2 insertions(+)
Applied!
Thanks,
Jes
^ permalink raw reply
* Re: [md PATCH 0/2] Make it possible to disable create_on_open semantics.
From: Shaohua Li @ 2017-04-12 19:24 UTC (permalink / raw)
To: NeilBrown; +Cc: linux-raid, Coly Li
In-Reply-To: <149197804398.19936.12809382889200123725.stgit@noble>
On Wed, Apr 12, 2017 at 04:26:12PM +1000, Neil Brown wrote:
> Currently, opening an md /dev node will create the array object.
> This makes it hard to destroy the object as udev will typically
> re-open the device node when handling REMOVE events.
>
> The "new_array" module parameter was created to work towards avoiding
> this problem, and it can be used when
> CREATE names=yes
>
> is given in /etc/mdadm.conf.
> How this doesn't currently support names like "md%d", which lots of
> people use and expect, so we need more work before we can transition
> away from create_on_open.
>
> These patches add support to "new_array" so that md%d devices
> can be created. This will make it, once again, possible to have
> md%d devices with numbers > 511. (3.17 make this impossible).
>
> An enhancement to mdadm that uses this will cause new_array to always
> be used (where available), and we can then disable create_on_open
> completely (after suitable transition periods).
Thanks, applied! The md device creation interface especially create_on_open is
a disaster, hopefully the future sysfs/configfs interface deprecates all of these.
Thanks,
Shaohua
^ permalink raw reply
* [md PATCH] md: handle read-only member devices better.
From: NeilBrown @ 2017-04-12 22:53 UTC (permalink / raw)
To: Shaohua Li; +Cc: Linux-RAID, Nanda Kishore Chinnaram
[-- Attachment #1: Type: text/plain, Size: 2813 bytes --]
1/ If an array has any read-only devices when it is started,
the array itself must be read-only
2/ A read-only device cannot be added to an array after it is
started.
3/ Setting an array to read-write should not succeed
if any member devices are read-only
Reported-and-Tested-by: Nanda Kishore Chinnaram <Nanda_Kishore_Chinna@dell.com>
Signed-off-by: NeilBrown <neilb@suse.com>
---
drivers/md/md.c | 41 ++++++++++++++++++++++++++---------------
1 file changed, 26 insertions(+), 15 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 22894303d335..9fe930109012 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2093,6 +2093,10 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
if (find_rdev(mddev, rdev->bdev->bd_dev))
return -EEXIST;
+ if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
+ mddev->pers)
+ return -EROFS;
+
/* make sure rdev->sectors exceeds mddev->dev_sectors */
if (!test_bit(Journal, &rdev->flags) &&
rdev->sectors &&
@@ -5345,6 +5349,13 @@ int md_run(struct mddev *mddev)
continue;
sync_blockdev(rdev->bdev);
invalidate_bdev(rdev->bdev);
+ if (mddev->ro != 1 &&
+ (bdev_read_only(rdev->bdev) ||
+ bdev_read_only(rdev->meta_bdev))) {
+ mddev->ro = 1;
+ if (mddev->gendisk)
+ set_disk_ro(mddev->gendisk, 1);
+ }
/* perform some consistency tests on the device.
* We don't want the data to overlap the metadata,
@@ -5569,6 +5580,9 @@ static int do_md_run(struct mddev *mddev)
static int restart_array(struct mddev *mddev)
{
struct gendisk *disk = mddev->gendisk;
+ struct md_rdev *rdev;
+ bool has_journal = false;
+ bool has_readonly = false;
/* Complain if it has no devices */
if (list_empty(&mddev->disks))
@@ -5577,24 +5591,21 @@ static int restart_array(struct mddev *mddev)
return -EINVAL;
if (!mddev->ro)
return -EBUSY;
- if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
- struct md_rdev *rdev;
- bool has_journal = false;
-
- rcu_read_lock();
- rdev_for_each_rcu(rdev, mddev) {
- if (test_bit(Journal, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags)) {
- has_journal = true;
- break;
- }
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev) {
+ if (test_bit(Journal, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags))
+ has_journal = true;
+ if (bdev_read_only(rdev->bdev))
+ has_readonly = true;
+ }
+ rcu_read_unlock();
+ if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
/* Don't restart rw with journal missing/faulty */
- if (!has_journal)
return -EINVAL;
- }
+ if (has_readonly)
+ return -EROFS;
mddev->safemode = 0;
mddev->ro = 0;
--
2.12.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]
^ permalink raw reply related
* [v4 1/2] lib/raid6: Build proper files on corresponding arch
From: Matt Brown @ 2017-04-12 23:15 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, linux-raid
Previously the raid6 test Makefile did not correctly build the files for
testing on PowerPC. This patch fixes the bug, so that all appropriate files
for PowerPC are built.
Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
Changlog
v2 - v4
- fixup whitespace
- change versioning to match other patch
---
lib/raid6/test/Makefile | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 9c333e9..b64a267 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
else
HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
ifeq ($(HAS_ALTIVEC),yes)
- OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+ CFLAGS += -I../../../arch/powerpc/include
+ CFLAGS += -DCONFIG_ALTIVEC
+ OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
endif
endif
ifeq ($(ARCH),tilegx)
--
2.9.3
^ permalink raw reply related
* [v4 2/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome
From: Matt Brown @ 2017-04-12 23:15 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, linux-raid
In-Reply-To: <20170412231546.15715-1-matthew.brown.dev@gmail.com>
The raid6 Q syndrome check has been optimised using the vpermxor
instruction. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.
The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.
Performance benchmarks:
raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s
raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s
Note: Fixed minor bug in pq.h regarding missing and mismatched ifdef
statements.
Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com>
---
include/linux/raid/pq.h | 4 ++
lib/raid6/Makefile | 27 ++++++++++++-
lib/raid6/algos.c | 4 ++
lib/raid6/altivec.uc | 3 ++
lib/raid6/test/Makefile | 14 ++++++-
lib/raid6/vpermxor.uc | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 154 insertions(+), 2 deletions(-)
create mode 100644 lib/raid6/vpermxor.uc
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
extern const struct raid6_calls raid6_avx512x4;
extern const struct raid6_calls raid6_tilegx8;
extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..db095a7 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
int8.o int16.o int32.o
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c: UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_altivec2,
&raid6_altivec4,
&raid6_altivec8,
+ &raid6_vpermxor1,
+ &raid6_vpermxor2,
+ &raid6_vpermxor4,
+ &raid6_vpermxor8,
#endif
#if defined(CONFIG_TILEGX)
&raid6_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
+#endif /* __KERNEL__ */
/*
* This is the C data type to use. We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..9c333e9 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -97,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
altivec8.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
+vpermxor1.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
int1.c: int.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=1 < int.uc > $@
@@ -122,7 +134,7 @@ tables.c: mktables
./mktables > tables.c
clean:
- rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+ rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
rm -f tilegx*.c
spotless: clean
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 0000000..31a324d
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor opcode to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and the vpermxor opcode.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+ 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+ 0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+ 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+ 0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+ void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+ unative_t wp$$, wq$$, wd$$;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for (d = 0; d < bytes; d += NSIZE*$#) {
+ wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+ for (z = z0-1; z>=0; z--) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ /* P syndrome */
+ wp$$ = vec_xor(wp$$, wd$$);
+
+ /*Q syndrome */
+ asm("vpermxor %0,%1,%2,%3":"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+ wq$$ = vec_xor(wq$$, wd$$);
+ }
+ *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ }
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+
+ raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+ /* Check if CPU has both altivec and the vpermxor instruction*/
+# ifdef __KERNEL__
+ return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+ return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+ raid6_vpermxor$#_gen_syndrome,
+ NULL,
+ raid6_have_altivec_vpermxor,
+ "vpermxor$#",
+ 0
+};
+#endif
--
2.9.3
^ permalink raw reply related
* Re: [md PATCH] md: handle read-only member devices better.
From: Shaohua Li @ 2017-04-13 5:47 UTC (permalink / raw)
To: NeilBrown; +Cc: Linux-RAID, Nanda Kishore Chinnaram
In-Reply-To: <87a87lutj7.fsf@notabene.neil.brown.name>
On Thu, Apr 13, 2017 at 08:53:48AM +1000, Neil Brown wrote:
>
> 1/ If an array has any read-only devices when it is started,
> the array itself must be read-only
> 2/ A read-only device cannot be added to an array after it is
> started.
> 3/ Setting an array to read-write should not succeed
> if any member devices are read-only
Didn't get these. We call md_import_device() first to open under layer disk. We
always use FMOD_READ|FMOD_WRITE to open the disk. So if the disk is ro,
md_import_device should fail, we don't add the disk to the array. Why would we
have such issues?
Thanks,
Shaohua
> Reported-and-Tested-by: Nanda Kishore Chinnaram <Nanda_Kishore_Chinna@dell.com>
> Signed-off-by: NeilBrown <neilb@suse.com>
> ---
> drivers/md/md.c | 41 ++++++++++++++++++++++++++---------------
> 1 file changed, 26 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 22894303d335..9fe930109012 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2093,6 +2093,10 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
> if (find_rdev(mddev, rdev->bdev->bd_dev))
> return -EEXIST;
>
> + if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
> + mddev->pers)
> + return -EROFS;
> +
> /* make sure rdev->sectors exceeds mddev->dev_sectors */
> if (!test_bit(Journal, &rdev->flags) &&
> rdev->sectors &&
> @@ -5345,6 +5349,13 @@ int md_run(struct mddev *mddev)
> continue;
> sync_blockdev(rdev->bdev);
> invalidate_bdev(rdev->bdev);
> + if (mddev->ro != 1 &&
> + (bdev_read_only(rdev->bdev) ||
> + bdev_read_only(rdev->meta_bdev))) {
> + mddev->ro = 1;
> + if (mddev->gendisk)
> + set_disk_ro(mddev->gendisk, 1);
> + }
>
> /* perform some consistency tests on the device.
> * We don't want the data to overlap the metadata,
> @@ -5569,6 +5580,9 @@ static int do_md_run(struct mddev *mddev)
> static int restart_array(struct mddev *mddev)
> {
> struct gendisk *disk = mddev->gendisk;
> + struct md_rdev *rdev;
> + bool has_journal = false;
> + bool has_readonly = false;
>
> /* Complain if it has no devices */
> if (list_empty(&mddev->disks))
> @@ -5577,24 +5591,21 @@ static int restart_array(struct mddev *mddev)
> return -EINVAL;
> if (!mddev->ro)
> return -EBUSY;
> - if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
> - struct md_rdev *rdev;
> - bool has_journal = false;
> -
> - rcu_read_lock();
> - rdev_for_each_rcu(rdev, mddev) {
> - if (test_bit(Journal, &rdev->flags) &&
> - !test_bit(Faulty, &rdev->flags)) {
> - has_journal = true;
> - break;
> - }
> - }
> - rcu_read_unlock();
>
> + rcu_read_lock();
> + rdev_for_each_rcu(rdev, mddev) {
> + if (test_bit(Journal, &rdev->flags) &&
> + !test_bit(Faulty, &rdev->flags))
> + has_journal = true;
> + if (bdev_read_only(rdev->bdev))
> + has_readonly = true;
> + }
> + rcu_read_unlock();
> + if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
> /* Don't restart rw with journal missing/faulty */
> - if (!has_journal)
> return -EINVAL;
> - }
> + if (has_readonly)
> + return -EROFS;
>
> mddev->safemode = 0;
> mddev->ro = 0;
> --
> 2.12.2
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox