From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
To: jes.sorensen@gmail.com
Cc: linux-raid@vger.kernel.org,
Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Subject: [PATCH v3 4/6] super1: PPL support
Date: Thu, 16 Mar 2017 22:09:46 +0100 [thread overview]
Message-ID: <20170316210948.21093-5-artur.paszkiewicz@intel.com> (raw)
In-Reply-To: <20170316210948.21093-1-artur.paszkiewicz@intel.com>
Enable creating and assembling raid5 arrays with PPL for 1.x metadata.
When creating, reserve enough space for PPL and store its size and
location in the superblock and set MD_FEATURE_PPL bit. Write an initial
empty header in the PPL area on each device. PPL is stored in the
metadata region reserved for internal write-intent bitmap, so don't
allow using bitmap and PPL together.
While at it, fix two endianness issues in write_empty_r5l_meta_block()
and write_init_super1().
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
Assemble.c | 3 ++
Create.c | 2 +
Grow.c | 15 +++++-
Incremental.c | 3 ++
mdadm.h | 1 +
super1.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
6 files changed, 155 insertions(+), 19 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 8e55b49f..c0984201 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -962,6 +962,9 @@ static int start_array(int mdfd,
c->readonly = 1;
}
+ if (content->consistency_policy == CONSISTENCY_POLICY_PPL)
+ clean = 1;
+
rv = set_array_info(mdfd, st, content);
if (rv && !err_ok) {
pr_err("failed to set array info for %s: %s\n",
diff --git a/Create.c b/Create.c
index 4080bf69..10e7d108 100644
--- a/Create.c
+++ b/Create.c
@@ -524,6 +524,8 @@ int Create(struct supertype *st, char *mddev,
if (!s->bitmap_file &&
s->level >= 1 &&
st->ss->add_internal_bitmap &&
+ (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+ s->consistency_policy != CONSISTENCY_POLICY_PPL) &&
(s->write_behind || s->size > 100*1024*1024ULL)) {
if (c->verbose > 0)
pr_err("automatically enabling write-intent bitmap on large array\n");
diff --git a/Grow.c b/Grow.c
index 455c5f90..e4351d7f 100755
--- a/Grow.c
+++ b/Grow.c
@@ -290,6 +290,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
int major = BITMAP_MAJOR_HI;
int vers = md_get_version(fd);
unsigned long long bitmapsize, array_size;
+ struct mdinfo *mdi;
if (vers < 9003) {
major = BITMAP_MAJOR_HOSTENDIAN;
@@ -389,12 +390,23 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
free(st);
return 1;
}
+
+ mdi = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY);
+ if (mdi) {
+ if (mdi->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ pr_err("Cannot add bitmap to array with PPL\n");
+ free(mdi);
+ free(st);
+ return 1;
+ }
+ free(mdi);
+ }
+
if (strcmp(s->bitmap_file, "internal") == 0 ||
strcmp(s->bitmap_file, "clustered") == 0) {
int rv;
int d;
int offset_setable = 0;
- struct mdinfo *mdi;
if (st->ss->add_internal_bitmap == NULL) {
pr_err("Internal bitmaps not supported with %s metadata\n", st->ss->name);
return 1;
@@ -446,6 +458,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
sysfs_init(mdi, fd, NULL);
rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
mdi->bitmap_offset);
+ free(mdi);
} else {
if (strcmp(s->bitmap_file, "clustered") == 0)
array.state |= (1 << MD_SB_CLUSTERED);
diff --git a/Incremental.c b/Incremental.c
index 0f507bb3..81afc7ec 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -528,6 +528,9 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
journal_device_missing = (info.journal_device_required) && (info.journal_clean == 0);
+ if (info.consistency_policy == CONSISTENCY_POLICY_PPL)
+ info.array.state |= 1;
+
if (enough(info.array.level, info.array.raid_disks,
info.array.layout, info.array.state & 1,
avail) == 0) {
diff --git a/mdadm.h b/mdadm.h
index 10c20416..ab1b7fc6 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -302,6 +302,7 @@ struct mdinfo {
long bitmap_offset; /* 0 == none, 1 == a file */
unsigned int ppl_size;
unsigned long long ppl_sector;
+ int ppl_offset;
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
diff --git a/super1.c b/super1.c
index 672cdde6..76eeca11 100644
--- a/super1.c
+++ b/super1.c
@@ -48,10 +48,18 @@ struct mdp_superblock_1 {
__u32 chunksize; /* in 512byte sectors */
__u32 raid_disks;
- __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
- * NOTE: signed, so bitmap can be before superblock
- * only meaningful of feature_map[0] is set.
- */
+ union {
+ __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
+ * NOTE: signed, so bitmap can be before superblock
+ * only meaningful of feature_map[0] is set.
+ */
+
+ /* only meaningful when feature_map[MD_FEATURE_PPL] is set */
+ struct {
+ __s16 offset; /* sectors from start of superblock that ppl starts */
+ __u16 size; /* ppl size in sectors */
+ } ppl;
+ };
/* These are only valid with feature bit '4' */
__u32 new_level; /* new level we are reshaping to */
@@ -131,6 +139,7 @@ struct misc_dev_info {
#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
#define MD_FEATURE_BITMAP_VERSIONED 256 /* bitmap version number checked properly */
#define MD_FEATURE_JOURNAL 512 /* support write journal */
+#define MD_FEATURE_PPL 1024 /* support PPL */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
@@ -140,6 +149,7 @@ struct misc_dev_info {
|MD_FEATURE_NEW_OFFSET \
|MD_FEATURE_BITMAP_VERSIONED \
|MD_FEATURE_JOURNAL \
+ |MD_FEATURE_PPL \
)
#ifndef MDASSEMBLE
@@ -289,6 +299,11 @@ static int awrite(struct align_fd *afd, void *buf, int len)
return len;
}
+static inline unsigned int choose_ppl_space(int chunk)
+{
+ return (PPL_HEADER_SIZE >> 9) + (chunk > 128*2 ? chunk : 128*2);
+}
+
#ifndef MDASSEMBLE
static void examine_super1(struct supertype *st, char *homehost)
{
@@ -392,6 +407,10 @@ static void examine_super1(struct supertype *st, char *homehost)
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
printf("Internal Bitmap : %ld sectors from superblock\n",
(long)(int32_t)__le32_to_cpu(sb->bitmap_offset));
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ printf(" PPL : %u sectors at offset %d sectors from superblock\n",
+ __le16_to_cpu(sb->ppl.size),
+ __le16_to_cpu(sb->ppl.offset));
}
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)) {
printf(" Reshape pos'n : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->reshape_position)/2,
@@ -934,10 +953,16 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
if (__le32_to_cpu(bsb->nodes) > 1)
info->array.state |= (1 << MD_SB_CLUSTERED);
+ super_offset = __le64_to_cpu(sb->super_offset);
info->data_offset = __le64_to_cpu(sb->data_offset);
info->component_size = __le64_to_cpu(sb->size);
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET))
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
info->bitmap_offset = (int32_t)__le32_to_cpu(sb->bitmap_offset);
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_PPL)) {
+ info->ppl_offset = __le16_to_cpu(sb->ppl.offset);
+ info->ppl_size = __le16_to_cpu(sb->ppl.size);
+ info->ppl_sector = super_offset + info->ppl_offset;
+ }
info->disk.major = 0;
info->disk.minor = 0;
@@ -948,7 +973,6 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
else
role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]);
- super_offset = __le64_to_cpu(sb->super_offset);
if (info->array.level <= 0)
data_size = __le64_to_cpu(sb->data_size);
else
@@ -965,8 +989,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
end = bboffset;
}
- if (super_offset + info->bitmap_offset < end)
- end = super_offset + info->bitmap_offset;
+ if (super_offset + info->bitmap_offset + info->ppl_offset < end)
+ end = super_offset + info->bitmap_offset + info->ppl_offset;
if (info->data_offset + data_size < end)
info->space_after = end - data_size - info->data_offset;
@@ -982,6 +1006,11 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
bmend += size;
if (bmend > earliest)
earliest = bmend;
+ } else if (info->ppl_offset > 0) {
+ unsigned long long pplend = info->ppl_offset +
+ info->ppl_size;
+ if (pplend > earliest)
+ earliest = pplend;
}
if (sb->bblog_offset && sb->bblog_size) {
unsigned long long bbend = super_offset;
@@ -1075,8 +1104,20 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
}
info->array.working_disks = working;
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL))
+
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL)) {
info->journal_device_required = 1;
+ info->consistency_policy = CONSISTENCY_POLICY_JOURNAL;
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_PPL)) {
+ info->consistency_policy = CONSISTENCY_POLICY_PPL;
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
+ info->consistency_policy = CONSISTENCY_POLICY_BITMAP;
+ } else if (info->array.level <= 0) {
+ info->consistency_policy = CONSISTENCY_POLICY_NONE;
+ } else {
+ info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ }
+
info->journal_clean = 0;
}
@@ -1239,6 +1280,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
bitmap_offset = (long)__le32_to_cpu(sb->bitmap_offset);
bm_sectors = calc_bitmap_size(bms, 4096) >> 9;
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ bitmap_offset = (long)__le16_to_cpu(sb->ppl.offset);
+ bm_sectors = (long)__le16_to_cpu(sb->ppl.size);
}
#endif
if (sb_offset < data_offset) {
@@ -1472,6 +1516,9 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
memset(sb->dev_roles, 0xff, MAX_SB_SIZE - sizeof(struct mdp_superblock_1));
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_PPL);
+
return 1;
}
@@ -1643,10 +1690,49 @@ static unsigned long choose_bm_space(unsigned long devsize)
static void free_super1(struct supertype *st);
-#define META_BLOCK_SIZE 4096
+#ifndef MDASSEMBLE
+
__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
-#ifndef MDASSEMBLE
+static int write_init_ppl1(struct supertype *st, struct mdinfo *info, int fd)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ void *buf;
+ struct ppl_header *ppl_hdr;
+ int ret;
+
+ ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+ if (ret) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return ret;
+ }
+
+ memset(buf, 0, PPL_HEADER_SIZE);
+ ppl_hdr = buf;
+ memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
+ ppl_hdr->signature = __cpu_to_le32(~crc32c_le(~0, sb->set_uuid,
+ sizeof(sb->set_uuid)));
+ ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+ if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
+ ret = errno;
+ perror("Failed to seek to PPL header location");
+ }
+
+ if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ ret = errno;
+ perror("Write PPL header failed");
+ }
+
+ if (!ret)
+ fsync(fd);
+
+ free(buf);
+ return ret;
+}
+
+#define META_BLOCK_SIZE 4096
+
static int write_empty_r5l_meta_block(struct supertype *st, int fd)
{
struct r5l_meta_block *mb;
@@ -1673,7 +1759,7 @@ static int write_empty_r5l_meta_block(struct supertype *st, int fd)
crc = crc32c_le(crc, (void *)mb, META_BLOCK_SIZE);
mb->checksum = crc;
- if (lseek64(fd, (sb->data_offset) * 512, 0) < 0LL) {
+ if (lseek64(fd, __le64_to_cpu(sb->data_offset) * 512, 0) < 0LL) {
pr_err("cannot seek to offset of the meta block\n");
goto fail_to_write;
}
@@ -1706,7 +1792,7 @@ static int write_init_super1(struct supertype *st)
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_JOURNAL))
- sb->feature_map |= MD_FEATURE_JOURNAL;
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
}
for (di = st->info; di; di = di->next) {
@@ -1781,6 +1867,21 @@ static int write_init_super1(struct supertype *st)
(((char *)sb) + MAX_SB_SIZE);
bm_space = calc_bitmap_size(bms, 4096) >> 9;
bm_offset = (long)__le32_to_cpu(sb->bitmap_offset);
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ bm_space = choose_ppl_space(__le32_to_cpu(sb->chunksize));
+ if (bm_space > UINT16_MAX)
+ bm_space = UINT16_MAX;
+ if (st->minor_version == 0) {
+ bm_offset = -bm_space - 8;
+ if (bm_offset < INT16_MIN) {
+ bm_offset = INT16_MIN;
+ bm_space = -bm_offset - 8;
+ }
+ } else {
+ bm_offset = 8;
+ }
+ sb->ppl.offset = __cpu_to_le16(bm_offset);
+ sb->ppl.size = __cpu_to_le16(bm_space);
} else {
bm_space = choose_bm_space(array_size);
bm_offset = 8;
@@ -1852,8 +1953,17 @@ static int write_init_super1(struct supertype *st)
goto error_out;
}
- if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
+ if (rv == 0 &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {
rv = st->ss->write_bitmap(st, di->fd, NodeNumUpdate);
+ } else if (rv == 0 &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL)) {
+ struct mdinfo info;
+
+ st->ss->getinfo_super(st, &info, NULL);
+ rv = st->ss->write_init_ppl(st, &info, di->fd);
+ }
+
close(di->fd);
di->fd = -1;
if (rv)
@@ -2121,11 +2231,13 @@ static __u64 avail_size1(struct supertype *st, __u64 devsize,
return 0;
#ifndef MDASSEMBLE
- if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
+ if (__le32_to_cpu(super->feature_map) & MD_FEATURE_BITMAP_OFFSET) {
/* hot-add. allow for actual size of bitmap */
struct bitmap_super_s *bsb;
bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
bmspace = calc_bitmap_size(bsb, 4096) >> 9;
+ } else if (__le32_to_cpu(super->feature_map) & MD_FEATURE_PPL) {
+ bmspace = __le16_to_cpu(super->ppl.size);
}
#endif
/* Allow space for bad block log */
@@ -2528,8 +2640,9 @@ static int validate_geometry1(struct supertype *st, int level,
return 0;
}
- /* creating: allow suitable space for bitmap */
- bmspace = choose_bm_space(devsize);
+ /* creating: allow suitable space for bitmap or PPL */
+ bmspace = consistency_policy == CONSISTENCY_POLICY_PPL ?
+ choose_ppl_space((*chunk)*2) : choose_bm_space(devsize);
if (data_offset == INVALID_SECTORS)
data_offset = st->data_offset;
@@ -2564,7 +2677,7 @@ static int validate_geometry1(struct supertype *st, int level,
switch(st->minor_version) {
case 0: /* metadata at end. Round down and subtract space to reserve */
devsize = (devsize & ~(4ULL*2-1));
- /* space for metadata, bblog, bitmap */
+ /* space for metadata, bblog, bitmap/ppl */
devsize -= 8*2 + 8 + bmspace;
break;
case 1:
@@ -2640,6 +2753,7 @@ struct superswitch super1 = {
.add_to_super = add_to_super1,
.examine_badblocks = examine_badblocks_super1,
.copy_metadata = copy_metadata1,
+ .write_init_ppl = write_init_ppl1,
#endif
.match_home = match_home1,
.uuid_from_super = uuid_from_super1,
--
2.11.0
next prev parent reply other threads:[~2017-03-16 21:09 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-16 21:09 [PATCH v3 0/6] mdadm support for Partial Parity Log Artur Paszkiewicz
2017-03-16 21:09 ` [PATCH v3 1/6] Generic support for --consistency-policy and PPL Artur Paszkiewicz
2017-03-16 21:09 ` [PATCH v3 2/6] Detail: show consistency policy Artur Paszkiewicz
2017-03-16 21:09 ` [PATCH v3 3/6] imsm: PPL support Artur Paszkiewicz
2017-03-17 20:11 ` jes.sorensen
2017-03-20 8:07 ` Artur Paszkiewicz
2017-03-28 18:21 ` jes.sorensen
2017-03-16 21:09 ` Artur Paszkiewicz [this message]
2017-03-16 21:09 ` [PATCH v3 5/6] Add 'ppl' and 'no-ppl' options for --update= Artur Paszkiewicz
2017-03-16 21:09 ` [PATCH v3 6/6] Grow: support consistency policy change Artur Paszkiewicz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170316210948.21093-5-artur.paszkiewicz@intel.com \
--to=artur.paszkiewicz@intel.com \
--cc=jes.sorensen@gmail.com \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).