* [PATCH 1/6] Add --dataalignmentoffset to pvcreate to shift start of aligned data area
2009-07-25 22:14 [PATCH 0/6 v7] LVM2 topology support Mike Snitzer
@ 2009-07-25 22:14 ` Mike Snitzer
2009-07-26 4:20 ` Mike Snitzer
2009-07-25 22:14 ` [PATCH 2/6] Add devices/data_alignment_offset_detection to lvm.conf Mike Snitzer
` (4 subsequent siblings)
5 siblings, 1 reply; 8+ messages in thread
From: Mike Snitzer @ 2009-07-25 22:14 UTC (permalink / raw)
To: lvm-devel
Implement pvcreate --dataalignmentoffset to shift the start of
the aligned data area.
Adds pe_align_offset to 'struct physical_volume'; is initialized with
set_pe_align_offset(). After pe_start is established pe_align_offset is
added to it.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
WHATS_NEW | 1 +
lib/format1/format1.c | 1 +
lib/format_pool/format_pool.c | 1 +
lib/format_text/archiver.c | 2 +-
lib/format_text/format-text.c | 17 ++++++++++++++++-
lib/metadata/metadata-exported.h | 2 ++
lib/metadata/metadata.c | 30 ++++++++++++++++++++++++++++--
lib/metadata/metadata.h | 3 +++
man/pvcreate.8.in | 20 +++++++++++++++++---
tools/args.h | 1 +
tools/commands.h | 7 ++++---
tools/pvcreate.c | 23 +++++++++++++++++++++--
tools/vgconvert.c | 2 +-
13 files changed, 97 insertions(+), 13 deletions(-)
Index: LVM2/WHATS_NEW
===================================================================
--- LVM2.orig/WHATS_NEW
+++ LVM2/WHATS_NEW
@@ -1,5 +1,6 @@
Version 2.02.50 -
================================
+ Add --dataalignmentoffset to pvcreate to shift start of aligned data area.
Add global/wait_for_locks to lvm.conf so blocking for locks can be disabled.
All LV locks are non-blocking so remove LCK_NONBLOCK from separate macros.
Fix race condition with vgcreate and vgextend on same device (2.02.49).
Index: LVM2/lib/format1/format1.c
===================================================================
--- LVM2.orig/lib/format1/format1.c
+++ LVM2/lib/format1/format1.c
@@ -296,6 +296,7 @@ static int _format1_pv_setup(const struc
uint64_t pe_start, uint32_t extent_count,
uint32_t extent_size,
unsigned long data_alignment __attribute((unused)),
+ unsigned long data_alignment_offset __attribute((unused)),
int pvmetadatacopies __attribute((unused)),
uint64_t pvmetadatasize __attribute((unused)), struct dm_list *mdas __attribute((unused)),
struct physical_volume *pv, struct volume_group *vg __attribute((unused)))
Index: LVM2/lib/format_pool/format_pool.c
===================================================================
--- LVM2.orig/lib/format_pool/format_pool.c
+++ LVM2/lib/format_pool/format_pool.c
@@ -192,6 +192,7 @@ static int _pool_pv_setup(const struct f
uint32_t extent_count __attribute((unused)),
uint32_t extent_size __attribute((unused)),
unsigned long data_alignment __attribute((unused)),
+ unsigned long data_alignment_offset __attribute((unused)),
int pvmetadatacopies __attribute((unused)),
uint64_t pvmetadatasize __attribute((unused)),
struct dm_list *mdas __attribute((unused)),
Index: LVM2/lib/format_text/archiver.c
===================================================================
--- LVM2.orig/lib/format_text/archiver.c
+++ LVM2/lib/format_text/archiver.c
@@ -320,7 +320,7 @@ int backup_restore_vg(struct cmd_context
return 0;
}
if (!vg->fid->fmt->ops->
- pv_setup(vg->fid->fmt, UINT64_C(0), 0, 0, 0, 0UL,
+ pv_setup(vg->fid->fmt, UINT64_C(0), 0, 0, 0, 0, 0UL,
UINT64_C(0), &vg->fid->metadata_areas, pv, vg)) {
log_error("Format-specific setup for %s failed",
pv_dev_name(pv));
Index: LVM2/lib/format_text/format-text.c
===================================================================
--- LVM2.orig/lib/format_text/format-text.c
+++ LVM2/lib/format_text/format-text.c
@@ -1382,8 +1382,16 @@ static int _text_pv_write(const struct f
adjustment = pv->pe_start % pv->pe_align;
if (adjustment)
pv->pe_start += pv->pe_align - adjustment;
+ if (pv->pe_align_offset)
+ pv->pe_start += pv->pe_align_offset;
}
}
+ if (pv->pe_start >= pv->size) {
+ log_error("Data area is beyond end of device %s!",
+ pv_dev_name(pv));
+ return 0;
+ }
+
if (!add_da
(NULL, &info->das, pv->pe_start << SECTOR_SHIFT, UINT64_C(0)))
return_0;
@@ -1598,6 +1606,7 @@ static struct metadata_area_ops _metadat
static int _text_pv_setup(const struct format_type *fmt,
uint64_t pe_start, uint32_t extent_count,
uint32_t extent_size, unsigned long data_alignment,
+ unsigned long data_alignment_offset,
int pvmetadatacopies,
uint64_t pvmetadatasize, struct dm_list *mdas,
struct physical_volume *pv, struct volume_group *vg)
@@ -1702,8 +1711,14 @@ static int _text_pv_setup(const struct f
"%lu sectors (requested %lu sectors)",
pv_dev_name(pv), pv->pe_align, data_alignment);
- if (pv->pe_start < pv->pe_align)
+ if (!pe_start)
+ set_pe_align_offset(pv, data_alignment_offset);
+
+ if (pv->pe_start < pv->pe_align) {
pv->pe_start = pv->pe_align;
+ if (pv->pe_align_offset)
+ pv->pe_start += pv->pe_align_offset;
+ }
if (extent_count)
pe_end = pe_start + extent_count * extent_size - 1;
Index: LVM2/lib/metadata/metadata-exported.h
===================================================================
--- LVM2.orig/lib/metadata/metadata-exported.h
+++ LVM2/lib/metadata/metadata-exported.h
@@ -184,6 +184,7 @@ struct physical_volume {
uint32_t pe_count;
uint32_t pe_alloc_count;
unsigned long pe_align;
+ unsigned long pe_align_offset;
struct dm_list segments; /* Ordered pv_segments covering complete PV */
struct dm_list tags;
@@ -398,6 +399,7 @@ pv_t *pv_create(const struct cmd_context
struct id *id,
uint64_t size,
unsigned long data_alignment,
+ unsigned long data_alignment_offset,
uint64_t pe_start,
uint32_t existing_extent_count,
uint32_t existing_extent_size,
Index: LVM2/lib/metadata/metadata.c
===================================================================
--- LVM2.orig/lib/metadata/metadata.c
+++ LVM2/lib/metadata/metadata.c
@@ -48,6 +48,7 @@ static struct physical_volume *_pv_creat
struct device *dev,
struct id *id, uint64_t size,
unsigned long data_alignment,
+ unsigned long data_alignment_offset,
uint64_t pe_start,
uint32_t existing_extent_count,
uint32_t existing_extent_size,
@@ -102,6 +103,25 @@ out:
return pv->pe_align;
}
+unsigned long set_pe_align_offset(struct physical_volume *pv,
+ unsigned long data_alignment_offset)
+{
+ if (pv->pe_align_offset)
+ goto out;
+
+ if (data_alignment_offset)
+ pv->pe_align_offset = data_alignment_offset;
+
+ if (!pv->dev)
+ goto out;
+
+ log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
+ dev_name(pv->dev), pv->pe_align_offset);
+
+out:
+ return pv->pe_align_offset;
+}
+
/**
* add_pv_to_vg - Add a physical volume to a volume group
* @vg - volume group to add to
@@ -163,7 +183,7 @@ int add_pv_to_vg(struct volume_group *vg
pv->pe_alloc_count = 0;
if (!fid->fmt->ops->pv_setup(fid->fmt, UINT64_C(0), 0,
- vg->extent_size, 0, 0UL, UINT64_C(0),
+ vg->extent_size, 0, 0, 0UL, UINT64_C(0),
&fid->metadata_areas, pv, vg)) {
log_error("Format-specific setup of physical volume '%s' "
"failed.", pv_name);
@@ -971,6 +991,7 @@ int vg_split_mdas(struct cmd_context *cm
* @id: PV UUID to use for initialization
* @size: size of the PV in sectors
* @data_alignment: requested alignment of data
+ * @data_alignment_offset: requested offset to aligned data
* @pe_start: physical extent start
* @existing_extent_count
* @existing_extent_size
@@ -989,13 +1010,15 @@ pv_t *pv_create(const struct cmd_context
struct device *dev,
struct id *id, uint64_t size,
unsigned long data_alignment,
+ unsigned long data_alignment_offset,
uint64_t pe_start,
uint32_t existing_extent_count,
uint32_t existing_extent_size,
int pvmetadatacopies,
uint64_t pvmetadatasize, struct dm_list *mdas)
{
- return _pv_create(cmd->fmt, dev, id, size, data_alignment, pe_start,
+ return _pv_create(cmd->fmt, dev, id, size,
+ data_alignment, data_alignment_offset, pe_start,
existing_extent_count,
existing_extent_size,
pvmetadatacopies,
@@ -1024,6 +1047,7 @@ static struct physical_volume *_alloc_pv
pv->pe_count = 0;
pv->pe_alloc_count = 0;
pv->pe_align = 0;
+ pv->pe_align_offset = 0;
pv->fmt = NULL;
pv->dev = dev;
@@ -1040,6 +1064,7 @@ static struct physical_volume *_pv_creat
struct device *dev,
struct id *id, uint64_t size,
unsigned long data_alignment,
+ unsigned long data_alignment_offset,
uint64_t pe_start,
uint32_t existing_extent_count,
uint32_t existing_extent_size,
@@ -1091,6 +1116,7 @@ static struct physical_volume *_pv_creat
if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
existing_extent_size, data_alignment,
+ data_alignment_offset,
pvmetadatacopies, pvmetadatasize, mdas,
pv, NULL)) {
log_error("%s: Format-specific setup of physical volume "
Index: LVM2/lib/metadata/metadata.h
===================================================================
--- LVM2.orig/lib/metadata/metadata.h
+++ LVM2/lib/metadata/metadata.h
@@ -213,6 +213,7 @@ struct format_handler {
int (*pv_setup) (const struct format_type * fmt,
uint64_t pe_start, uint32_t extent_count,
uint32_t extent_size, unsigned long data_alignment,
+ unsigned long data_alignment_offset,
int pvmetadatacopies,
uint64_t pvmetadatasize, struct dm_list * mdas,
struct physical_volume * pv, struct volume_group * vg);
@@ -267,6 +268,8 @@ struct format_handler {
* Utility functions
*/
unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment);
+unsigned long set_pe_align_offset(struct physical_volume *pv,
+ unsigned long data_alignment_offset);
int vg_validate(struct volume_group *vg);
int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv);
Index: LVM2/man/pvcreate.8.in
===================================================================
--- LVM2.orig/man/pvcreate.8.in
+++ LVM2/man/pvcreate.8.in
@@ -14,6 +14,7 @@ pvcreate \- initialize a disk or partiti
.RB [ \-\-metadatacopies #copies ]
.RB [ \-\-metadatasize size ]
.RB [ \-\-dataalignment alignment ]
+.RB [ \-\-dataalignmentoffset alignment_offset ]
.RB [ \-\-restorefile file ]
.RB [ \-\-setphysicalvolumesize size ]
.RB [ \-u | \-\-uuid uuid ]
@@ -91,13 +92,18 @@ The approximate amount of space to be se
(The size you specify may get rounded.)
.TP
.BR \-\-dataalignment " alignment"
-Align the offset of the start of the data to a multiple of this number.
+Align the start of the data to a multiple of this number.
You should also specify an appropriate \fBPhysicalExtentSize\fP when creating
the Volume Group with \fBvgcreate\fP.
.sp
To see the location of the first Physical Extent of an existing Physical Volume
use \fBpvs -o +pe_start\fP . It will be a multiple of the requested
-\fBdata_alignment\fP.
+\fBalignment\fP. In addition it may be shifted by \fBalignment_offset\fP from
+\fBdata_alignment_offset_detection\fP (if enabled in \fBlvm.conf\fP) or
+\fB--dataalignmentoffset\fP.
+.TP
+.BR \-\-dataalignmentoffset " alignment_offset"
+Shift the start of the data area by this additional \fBalignment_offset\fP.
.TP
.BR \-\-metadatacopies " copies"
The number of metadata areas to set aside on each PV. Currently
@@ -128,13 +134,21 @@ in the source). Use with care.
.TP
.BR \-\-setphysicalvolumesize " size"
Overrides the automatically-detected size of the PV. Use with care.
-.SH Example
+.SH EXAMPLES
Initialize partition #4 on the third SCSI disk and the entire fifth
SCSI disk for later use by LVM:
.sp
.B pvcreate /dev/sdc4 /dev/sde
.sp
+If the 2nd SCSI disk is a 4KB sector drive that compensates for windows
+partitioning (sector 7 is the lowest aligned logical block, the 4KB
+sectors start at LBA -1, and consequently sector 63 is aligned on a 4KB
+boundary) manually account for this when initializing for use by LVM:
+.sp
+.B pvcreate --dataalignmentoffset 7s /dev/sdb
+.sp
.SH SEE ALSO
+.BR lvm.conf (5),
.BR lvm (8),
.BR vgcreate (8),
.BR vgextend (8),
Index: LVM2/tools/args.h
===================================================================
--- LVM2.orig/tools/args.h
+++ LVM2/tools/args.h
@@ -59,6 +59,7 @@ arg(nameprefixes_ARG, '\0', "nameprefixe
arg(unquoted_ARG, '\0', "unquoted", NULL, 0)
arg(rows_ARG, '\0', "rows", NULL, 0)
arg(dataalignment_ARG, '\0', "dataalignment", size_kb_arg, 0)
+arg(dataalignmentoffset_ARG, '\0', "dataalignmentoffset", size_kb_arg, 0)
arg(virtualoriginsize_ARG, '\0', "virtualoriginsize", size_mb_arg, 0)
arg(virtualsize_ARG, '\0', "virtualsize", size_mb_arg, 0)
Index: LVM2/tools/commands.h
===================================================================
--- LVM2.orig/tools/commands.h
+++ LVM2/tools/commands.h
@@ -470,6 +470,7 @@ xx(pvcreate,
"\t[--metadatacopies #copies]" "\n"
"\t[--metadatasize MetadataSize[bBsSkKmMgGtTpPeE]]" "\n"
"\t[--dataalignment Alignment[bBsSkKmMgGtTpPeE]]" "\n"
+ "\t[--dataalignmentoffset AlignmentOffset[bBsSkKmMgGtTpPeE]]" "\n"
"\t[--setphysicalvolumesize PhysicalVolumeSize[bBsSkKmMgGtTpPeE]" "\n"
"\t[-t|--test] " "\n"
"\t[-u|--uuid uuid] " "\n"
@@ -479,9 +480,9 @@ xx(pvcreate,
"\t[--version] " "\n"
"\tPhysicalVolume [PhysicalVolume...]\n",
- dataalignment_ARG, force_ARG, test_ARG, labelsector_ARG, metadatatype_ARG,
- metadatacopies_ARG, metadatasize_ARG, physicalvolumesize_ARG,
- restorefile_ARG, uuidstr_ARG, yes_ARG, zero_ARG)
+ dataalignment_ARG, dataalignmentoffset_ARG, force_ARG, test_ARG,
+ labelsector_ARG, metadatatype_ARG, metadatacopies_ARG, metadatasize_ARG,
+ physicalvolumesize_ARG, restorefile_ARG, uuidstr_ARG, yes_ARG, zero_ARG)
xx(pvdata,
"Display the on-disk metadata for physical volume(s)",
Index: LVM2/tools/pvcreate.c
===================================================================
--- LVM2.orig/tools/pvcreate.c
+++ LVM2/tools/pvcreate.c
@@ -20,6 +20,7 @@ struct pvcreate_params {
int zero;
uint64_t size;
uint64_t data_alignment;
+ uint64_t data_alignment_offset;
int pvmetadatacopies;
uint64_t pvmetadatasize;
int64_t labelsector;
@@ -203,8 +204,8 @@ static int pvcreate_single(struct cmd_co
dm_list_init(&mdas);
if (!(pv = pv_create(cmd, dev, pp->idp, pp->size,
- pp->data_alignment, pp->pe_start,
- pp->extent_count, pp->extent_size,
+ pp->data_alignment, pp->data_alignment_offset,
+ pp->pe_start, pp->extent_count, pp->extent_size,
pp->pvmetadatacopies,
pp->pvmetadatasize,&mdas))) {
log_error("Failed to setup physical volume \"%s\"", pv_name);
@@ -377,6 +378,24 @@ static int pvcreate_validate_params(stru
pp->data_alignment = 0;
}
+ if (arg_sign_value(cmd, dataalignmentoffset_ARG, 0) == SIGN_MINUS) {
+ log_error("Physical volume data alignment offset may not be negative");
+ return 0;
+ }
+ pp->data_alignment_offset = arg_uint64_value(cmd, dataalignmentoffset_ARG, UINT64_C(0));
+
+ if (pp->data_alignment_offset > ULONG_MAX) {
+ log_error("Physical volume data alignment offset is too big.");
+ return 0;
+ }
+
+ if (pp->data_alignment_offset && pp->pe_start) {
+ log_warn("WARNING: Ignoring data alignment offset %" PRIu64
+ " incompatible with --restorefile value (%"
+ PRIu64").", pp->data_alignment_offset, pp->pe_start);
+ pp->data_alignment_offset = 0;
+ }
+
if (arg_sign_value(cmd, metadatasize_ARG, 0) == SIGN_MINUS) {
log_error("Metadata size may not be negative");
return 0;
Index: LVM2/tools/vgconvert.c
===================================================================
--- LVM2.orig/tools/vgconvert.c
+++ LVM2/tools/vgconvert.c
@@ -123,7 +123,7 @@ static int vgconvert_single(struct cmd_c
dm_list_init(&mdas);
if (!(pv = pv_create(cmd, pv_dev(existing_pv),
- &existing_pv->id, size, 0,
+ &existing_pv->id, size, 0, 0,
pe_start, pv_pe_count(existing_pv),
pv_pe_size(existing_pv), pvmetadatacopies,
pvmetadatasize, &mdas))) {
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 1/6] Add --dataalignmentoffset to pvcreate to shift start of aligned data area
2009-07-25 22:14 ` [PATCH 1/6] Add --dataalignmentoffset to pvcreate to shift start of aligned data area Mike Snitzer
@ 2009-07-26 4:20 ` Mike Snitzer
0 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2009-07-26 4:20 UTC (permalink / raw)
To: lvm-devel
[patch updated in response to the recent liblvm pvcreate changes]
Implement pvcreate --dataalignmentoffset to shift the start of
the aligned data area.
Adds pe_align_offset to 'struct physical_volume'; is initialized with
set_pe_align_offset(). After pe_start is established pe_align_offset is
added to it.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
WHATS_NEW | 1 +
lib/format1/format1.c | 1 +
lib/format_pool/format_pool.c | 1 +
lib/format_text/archiver.c | 2 +-
lib/format_text/format-text.c | 17 ++++++++++++++++-
lib/metadata/metadata-exported.h | 3 +++
lib/metadata/metadata.c | 30 +++++++++++++++++++++++++++---
lib/metadata/metadata.h | 3 +++
man/pvcreate.8.in | 20 +++++++++++++++++---
tools/args.h | 1 +
tools/commands.h | 7 ++++---
tools/pvcreate.c | 18 ++++++++++++++++++
tools/vgconvert.c | 2 +-
13 files changed, 94 insertions(+), 12 deletions(-)
Index: LVM2/WHATS_NEW
===================================================================
--- LVM2.orig/WHATS_NEW
+++ LVM2/WHATS_NEW
@@ -1,5 +1,6 @@
Version 2.02.50 -
================================
+ Add --dataalignmentoffset to pvcreate to shift start of aligned data area.
Add global/wait_for_locks to lvm.conf so blocking for locks can be disabled.
All LV locks are non-blocking so remove LCK_NONBLOCK from separate macros.
Fix race condition with vgcreate and vgextend on same device (2.02.49).
Index: LVM2/lib/format1/format1.c
===================================================================
--- LVM2.orig/lib/format1/format1.c
+++ LVM2/lib/format1/format1.c
@@ -296,6 +296,7 @@ static int _format1_pv_setup(const struc
uint64_t pe_start, uint32_t extent_count,
uint32_t extent_size,
unsigned long data_alignment __attribute((unused)),
+ unsigned long data_alignment_offset __attribute((unused)),
int pvmetadatacopies __attribute((unused)),
uint64_t pvmetadatasize __attribute((unused)), struct dm_list *mdas __attribute((unused)),
struct physical_volume *pv, struct volume_group *vg __attribute((unused)))
Index: LVM2/lib/format_pool/format_pool.c
===================================================================
--- LVM2.orig/lib/format_pool/format_pool.c
+++ LVM2/lib/format_pool/format_pool.c
@@ -192,6 +192,7 @@ static int _pool_pv_setup(const struct f
uint32_t extent_count __attribute((unused)),
uint32_t extent_size __attribute((unused)),
unsigned long data_alignment __attribute((unused)),
+ unsigned long data_alignment_offset __attribute((unused)),
int pvmetadatacopies __attribute((unused)),
uint64_t pvmetadatasize __attribute((unused)),
struct dm_list *mdas __attribute((unused)),
Index: LVM2/lib/format_text/archiver.c
===================================================================
--- LVM2.orig/lib/format_text/archiver.c
+++ LVM2/lib/format_text/archiver.c
@@ -320,7 +320,7 @@ int backup_restore_vg(struct cmd_context
return 0;
}
if (!vg->fid->fmt->ops->
- pv_setup(vg->fid->fmt, UINT64_C(0), 0, 0, 0, 0UL,
+ pv_setup(vg->fid->fmt, UINT64_C(0), 0, 0, 0, 0, 0UL,
UINT64_C(0), &vg->fid->metadata_areas, pv, vg)) {
log_error("Format-specific setup for %s failed",
pv_dev_name(pv));
Index: LVM2/lib/format_text/format-text.c
===================================================================
--- LVM2.orig/lib/format_text/format-text.c
+++ LVM2/lib/format_text/format-text.c
@@ -1382,8 +1382,16 @@ static int _text_pv_write(const struct f
adjustment = pv->pe_start % pv->pe_align;
if (adjustment)
pv->pe_start += pv->pe_align - adjustment;
+ if (pv->pe_align_offset)
+ pv->pe_start += pv->pe_align_offset;
}
}
+ if (pv->pe_start >= pv->size) {
+ log_error("Data area is beyond end of device %s!",
+ pv_dev_name(pv));
+ return 0;
+ }
+
if (!add_da
(NULL, &info->das, pv->pe_start << SECTOR_SHIFT, UINT64_C(0)))
return_0;
@@ -1598,6 +1606,7 @@ static struct metadata_area_ops _metadat
static int _text_pv_setup(const struct format_type *fmt,
uint64_t pe_start, uint32_t extent_count,
uint32_t extent_size, unsigned long data_alignment,
+ unsigned long data_alignment_offset,
int pvmetadatacopies,
uint64_t pvmetadatasize, struct dm_list *mdas,
struct physical_volume *pv, struct volume_group *vg)
@@ -1702,8 +1711,14 @@ static int _text_pv_setup(const struct f
"%lu sectors (requested %lu sectors)",
pv_dev_name(pv), pv->pe_align, data_alignment);
- if (pv->pe_start < pv->pe_align)
+ if (!pe_start)
+ set_pe_align_offset(pv, data_alignment_offset);
+
+ if (pv->pe_start < pv->pe_align) {
pv->pe_start = pv->pe_align;
+ if (pv->pe_align_offset)
+ pv->pe_start += pv->pe_align_offset;
+ }
if (extent_count)
pe_end = pe_start + extent_count * extent_size - 1;
Index: LVM2/lib/metadata/metadata-exported.h
===================================================================
--- LVM2.orig/lib/metadata/metadata-exported.h
+++ LVM2/lib/metadata/metadata-exported.h
@@ -184,6 +184,7 @@ struct physical_volume {
uint32_t pe_count;
uint32_t pe_alloc_count;
unsigned long pe_align;
+ unsigned long pe_align_offset;
struct dm_list segments; /* Ordered pv_segments covering complete PV */
struct dm_list tags;
@@ -341,6 +342,7 @@ struct pvcreate_params {
int zero;
uint64_t size;
uint64_t data_alignment;
+ uint64_t data_alignment_offset;
int pvmetadatacopies;
uint64_t pvmetadatasize;
int64_t labelsector;
@@ -418,6 +420,7 @@ pv_t *pv_create(const struct cmd_context
struct id *id,
uint64_t size,
unsigned long data_alignment,
+ unsigned long data_alignment_offset,
uint64_t pe_start,
uint32_t existing_extent_count,
uint32_t existing_extent_size,
Index: LVM2/lib/metadata/metadata.c
===================================================================
--- LVM2.orig/lib/metadata/metadata.c
+++ LVM2/lib/metadata/metadata.c
@@ -96,6 +96,25 @@ out:
return pv->pe_align;
}
+unsigned long set_pe_align_offset(struct physical_volume *pv,
+ unsigned long data_alignment_offset)
+{
+ if (pv->pe_align_offset)
+ goto out;
+
+ if (data_alignment_offset)
+ pv->pe_align_offset = data_alignment_offset;
+
+ if (!pv->dev)
+ goto out;
+
+ log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
+ dev_name(pv->dev), pv->pe_align_offset);
+
+out:
+ return pv->pe_align_offset;
+}
+
/**
* add_pv_to_vg - Add a physical volume to a volume group
* @vg - volume group to add to
@@ -157,7 +176,7 @@ int add_pv_to_vg(struct volume_group *vg
pv->pe_alloc_count = 0;
if (!fid->fmt->ops->pv_setup(fid->fmt, UINT64_C(0), 0,
- vg->extent_size, 0, 0UL, UINT64_C(0),
+ vg->extent_size, 0, 0, 0UL, UINT64_C(0),
&fid->metadata_areas, pv, vg)) {
log_error("Format-specific setup of physical volume '%s' "
"failed.", pv_name);
@@ -1117,6 +1136,7 @@ static void fill_default_pvcreate_params
pp->zero = 0;
pp->size = 0;
pp->data_alignment = UINT64_C(0);
+ pp->data_alignment_offset = UINT64_C(0);
pp->pvmetadatacopies = DEFAULT_PVMETADATACOPIES;
pp->pvmetadatasize = DEFAULT_PVMETADATASIZE;
pp->labelsector = DEFAULT_LABELSECTOR;
@@ -1175,8 +1195,8 @@ pv_t * pvcreate_single(struct cmd_contex
dm_list_init(&mdas);
if (!(pv = pv_create(cmd, dev, pp->idp, pp->size,
- pp->data_alignment, pp->pe_start,
- pp->extent_count, pp->extent_size,
+ pp->data_alignment, pp->data_alignment_offset,
+ pp->pe_start, pp->extent_count, pp->extent_size,
pp->pvmetadatacopies,
pp->pvmetadatasize,&mdas))) {
log_error("Failed to setup physical volume \"%s\"", pv_name);
@@ -1245,6 +1265,7 @@ static struct physical_volume *_alloc_pv
pv->pe_count = 0;
pv->pe_alloc_count = 0;
pv->pe_align = 0;
+ pv->pe_align_offset = 0;
pv->fmt = NULL;
pv->dev = dev;
@@ -1263,6 +1284,7 @@ static struct physical_volume *_alloc_pv
* @dev: PV device to initialize
* @size: size of the PV in sectors
* @data_alignment: requested alignment of data
+ * @data_alignment_offset: requested offset to aligned data
* @pe_start: physical extent start
* @existing_extent_count
* @existing_extent_size
@@ -1281,6 +1303,7 @@ struct physical_volume *pv_create(const
struct device *dev,
struct id *id, uint64_t size,
unsigned long data_alignment,
+ unsigned long data_alignment_offset,
uint64_t pe_start,
uint32_t existing_extent_count,
uint32_t existing_extent_size,
@@ -1333,6 +1356,7 @@ struct physical_volume *pv_create(const
if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
existing_extent_size, data_alignment,
+ data_alignment_offset,
pvmetadatacopies, pvmetadatasize, mdas,
pv, NULL)) {
log_error("%s: Format-specific setup of physical volume "
Index: LVM2/lib/metadata/metadata.h
===================================================================
--- LVM2.orig/lib/metadata/metadata.h
+++ LVM2/lib/metadata/metadata.h
@@ -213,6 +213,7 @@ struct format_handler {
int (*pv_setup) (const struct format_type * fmt,
uint64_t pe_start, uint32_t extent_count,
uint32_t extent_size, unsigned long data_alignment,
+ unsigned long data_alignment_offset,
int pvmetadatacopies,
uint64_t pvmetadatasize, struct dm_list * mdas,
struct physical_volume * pv, struct volume_group * vg);
@@ -267,6 +268,8 @@ struct format_handler {
* Utility functions
*/
unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment);
+unsigned long set_pe_align_offset(struct physical_volume *pv,
+ unsigned long data_alignment_offset);
int vg_validate(struct volume_group *vg);
int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv);
Index: LVM2/man/pvcreate.8.in
===================================================================
--- LVM2.orig/man/pvcreate.8.in
+++ LVM2/man/pvcreate.8.in
@@ -14,6 +14,7 @@ pvcreate \- initialize a disk or partiti
.RB [ \-\-metadatacopies #copies ]
.RB [ \-\-metadatasize size ]
.RB [ \-\-dataalignment alignment ]
+.RB [ \-\-dataalignmentoffset alignment_offset ]
.RB [ \-\-restorefile file ]
.RB [ \-\-setphysicalvolumesize size ]
.RB [ \-u | \-\-uuid uuid ]
@@ -91,13 +92,18 @@ The approximate amount of space to be se
(The size you specify may get rounded.)
.TP
.BR \-\-dataalignment " alignment"
-Align the offset of the start of the data to a multiple of this number.
+Align the start of the data to a multiple of this number.
You should also specify an appropriate \fBPhysicalExtentSize\fP when creating
the Volume Group with \fBvgcreate\fP.
.sp
To see the location of the first Physical Extent of an existing Physical Volume
use \fBpvs -o +pe_start\fP . It will be a multiple of the requested
-\fBdata_alignment\fP.
+\fBalignment\fP. In addition it may be shifted by \fBalignment_offset\fP from
+\fBdata_alignment_offset_detection\fP (if enabled in \fBlvm.conf\fP) or
+\fB--dataalignmentoffset\fP.
+.TP
+.BR \-\-dataalignmentoffset " alignment_offset"
+Shift the start of the data area by this additional \fBalignment_offset\fP.
.TP
.BR \-\-metadatacopies " copies"
The number of metadata areas to set aside on each PV. Currently
@@ -128,13 +134,21 @@ in the source). Use with care.
.TP
.BR \-\-setphysicalvolumesize " size"
Overrides the automatically-detected size of the PV. Use with care.
-.SH Example
+.SH EXAMPLES
Initialize partition #4 on the third SCSI disk and the entire fifth
SCSI disk for later use by LVM:
.sp
.B pvcreate /dev/sdc4 /dev/sde
.sp
+If the 2nd SCSI disk is a 4KB sector drive that compensates for windows
+partitioning (sector 7 is the lowest aligned logical block, the 4KB
+sectors start at LBA -1, and consequently sector 63 is aligned on a 4KB
+boundary) manually account for this when initializing for use by LVM:
+.sp
+.B pvcreate --dataalignmentoffset 7s /dev/sdb
+.sp
.SH SEE ALSO
+.BR lvm.conf (5),
.BR lvm (8),
.BR vgcreate (8),
.BR vgextend (8),
Index: LVM2/tools/args.h
===================================================================
--- LVM2.orig/tools/args.h
+++ LVM2/tools/args.h
@@ -59,6 +59,7 @@ arg(nameprefixes_ARG, '\0', "nameprefixe
arg(unquoted_ARG, '\0', "unquoted", NULL, 0)
arg(rows_ARG, '\0', "rows", NULL, 0)
arg(dataalignment_ARG, '\0', "dataalignment", size_kb_arg, 0)
+arg(dataalignmentoffset_ARG, '\0', "dataalignmentoffset", size_kb_arg, 0)
arg(virtualoriginsize_ARG, '\0', "virtualoriginsize", size_mb_arg, 0)
arg(virtualsize_ARG, '\0', "virtualsize", size_mb_arg, 0)
Index: LVM2/tools/commands.h
===================================================================
--- LVM2.orig/tools/commands.h
+++ LVM2/tools/commands.h
@@ -470,6 +470,7 @@ xx(pvcreate,
"\t[--metadatacopies #copies]" "\n"
"\t[--metadatasize MetadataSize[bBsSkKmMgGtTpPeE]]" "\n"
"\t[--dataalignment Alignment[bBsSkKmMgGtTpPeE]]" "\n"
+ "\t[--dataalignmentoffset AlignmentOffset[bBsSkKmMgGtTpPeE]]" "\n"
"\t[--setphysicalvolumesize PhysicalVolumeSize[bBsSkKmMgGtTpPeE]" "\n"
"\t[-t|--test] " "\n"
"\t[-u|--uuid uuid] " "\n"
@@ -479,9 +480,9 @@ xx(pvcreate,
"\t[--version] " "\n"
"\tPhysicalVolume [PhysicalVolume...]\n",
- dataalignment_ARG, force_ARG, test_ARG, labelsector_ARG, metadatatype_ARG,
- metadatacopies_ARG, metadatasize_ARG, physicalvolumesize_ARG,
- restorefile_ARG, uuidstr_ARG, yes_ARG, zero_ARG)
+ dataalignment_ARG, dataalignmentoffset_ARG, force_ARG, test_ARG,
+ labelsector_ARG, metadatatype_ARG, metadatacopies_ARG, metadatasize_ARG,
+ physicalvolumesize_ARG, restorefile_ARG, uuidstr_ARG, yes_ARG, zero_ARG)
xx(pvdata,
"Display the on-disk metadata for physical volume(s)",
Index: LVM2/tools/pvcreate.c
===================================================================
--- LVM2.orig/tools/pvcreate.c
+++ LVM2/tools/pvcreate.c
@@ -140,6 +140,24 @@ static int pvcreate_validate_params(stru
pp->data_alignment = 0;
}
+ if (arg_sign_value(cmd, dataalignmentoffset_ARG, 0) == SIGN_MINUS) {
+ log_error("Physical volume data alignment offset may not be negative");
+ return 0;
+ }
+ pp->data_alignment_offset = arg_uint64_value(cmd, dataalignmentoffset_ARG, UINT64_C(0));
+
+ if (pp->data_alignment_offset > ULONG_MAX) {
+ log_error("Physical volume data alignment offset is too big.");
+ return 0;
+ }
+
+ if (pp->data_alignment_offset && pp->pe_start) {
+ log_warn("WARNING: Ignoring data alignment offset %" PRIu64
+ " incompatible with --restorefile value (%"
+ PRIu64").", pp->data_alignment_offset, pp->pe_start);
+ pp->data_alignment_offset = 0;
+ }
+
if (arg_sign_value(cmd, metadatasize_ARG, 0) == SIGN_MINUS) {
log_error("Metadata size may not be negative");
return 0;
Index: LVM2/tools/vgconvert.c
===================================================================
--- LVM2.orig/tools/vgconvert.c
+++ LVM2/tools/vgconvert.c
@@ -123,7 +123,7 @@ static int vgconvert_single(struct cmd_c
dm_list_init(&mdas);
if (!(pv = pv_create(cmd, pv_dev(existing_pv),
- &existing_pv->id, size, 0,
+ &existing_pv->id, size, 0, 0,
pe_start, pv_pe_count(existing_pv),
pv_pe_size(existing_pv), pvmetadatacopies,
pvmetadatasize, &mdas))) {
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/6] Add devices/data_alignment_offset_detection to lvm.conf.
2009-07-25 22:14 [PATCH 0/6 v7] LVM2 topology support Mike Snitzer
2009-07-25 22:14 ` [PATCH 1/6] Add --dataalignmentoffset to pvcreate to shift start of aligned data area Mike Snitzer
@ 2009-07-25 22:14 ` Mike Snitzer
2009-07-25 22:14 ` [PATCH 3/6] Add devices/data_alignment_detection " Mike Snitzer
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2009-07-25 22:14 UTC (permalink / raw)
To: lvm-devel
If the pvcreate --dataalignmentoffset option is not specified the start
of a PV's aligned data area will be padded with the associated
'alignment_offset' exposed in sysfs (unless
devices/data_alignment_offset_detection is disabled in lvm.conf).
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
WHATS_NEW | 1
doc/example.conf | 9 +++++
lib/config/defaults.h | 1
lib/device/device.c | 71 ++++++++++++++++++++++++++++++++++++++++++
lib/device/device.h | 3 +
lib/format_text/format-text.c | 8 +++-
lib/metadata/metadata.c | 8 ++++
man/lvm.conf.5.in | 9 ++++-
8 files changed, 107 insertions(+), 3 deletions(-)
Index: LVM2/WHATS_NEW
===================================================================
--- LVM2.orig/WHATS_NEW
+++ LVM2/WHATS_NEW
@@ -1,5 +1,6 @@
Version 2.02.50 -
================================
+ Add devices/data_alignment_offset_detection to lvm.conf.
Add --dataalignmentoffset to pvcreate to shift start of aligned data area.
Add global/wait_for_locks to lvm.conf so blocking for locks can be disabled.
All LV locks are non-blocking so remove LCK_NONBLOCK from separate macros.
Index: LVM2/doc/example.conf
===================================================================
--- LVM2.orig/doc/example.conf
+++ LVM2/doc/example.conf
@@ -104,6 +104,15 @@ devices {
# Set to 0 for the default alignment of 64KB or page size, if larger.
data_alignment = 0
+ # By default, the start of a PV's aligned data area will be padded with
+ # the 'alignment_offset' exposed in sysfs. This offset is often 0 but
+ # may be non-zero; e.g.: certain 4KB sector drives that compensate for
+ # windows partitioning will have an alignment_offset of 3584 bytes
+ # (sector 7 is the lowest aligned logical block, the 4KB sectors start
+ # at LBA -1, and consequently sector 63 is aligned on a 4KB boundary).
+ # 1 enables; 0 disables.
+ data_alignment_offset_detection = 1
+
# If, while scanning the system for PVs, LVM2 encounters a device-mapper
# device that has its I/O suspended, it waits for it to become accessible.
# Set this to 1 to skip such devices. This should only be needed
Index: LVM2/lib/config/defaults.h
===================================================================
--- LVM2.orig/lib/config/defaults.h
+++ LVM2/lib/config/defaults.h
@@ -34,6 +34,7 @@
#define DEFAULT_MD_COMPONENT_DETECTION 1
#define DEFAULT_MD_CHUNK_ALIGNMENT 1
#define DEFAULT_IGNORE_SUSPENDED_DEVICES 1
+#define DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION 1
#define DEFAULT_LOCK_DIR "/var/lock/lvm"
#define DEFAULT_LOCKING_LIB "liblvm2clusterlock.so"
Index: LVM2/lib/device/device.c
===================================================================
--- LVM2.orig/lib/device/device.c
+++ LVM2/lib/device/device.c
@@ -282,3 +282,74 @@ int _get_partition_type(struct dev_mgr *
return 0;
}
#endif
+
+#ifdef linux
+
+static unsigned long _dev_topology_attribute(const char *attribute,
+ const char *sysfs_dir,
+ struct device *dev)
+{
+ char path[PATH_MAX+1], buffer[64];
+ FILE *fp;
+ struct stat info;
+ unsigned long result = 0UL;
+
+ if (!attribute || !*attribute)
+ return_0;
+
+ if (!sysfs_dir || !*sysfs_dir)
+ return_0;
+
+ if (dm_snprintf(path, PATH_MAX, "%s/dev/block/%d:%d/%s",
+ sysfs_dir, (int)MAJOR(dev->dev), (int)MINOR(dev->dev),
+ attribute) < 0) {
+ log_error("dm_snprintf %s failed", attribute);
+ return 0;
+ }
+
+ /* check if the desired sysfs attribute exists */
+ if (stat(path, &info) < 0)
+ return 0;
+
+ if (!(fp = fopen(path, "r"))) {
+ log_sys_error("fopen", path);
+ return 0;
+ }
+
+ if (!fgets(buffer, sizeof(buffer), fp)) {
+ log_sys_error("fgets", path);
+ goto out;
+ }
+
+ if (sscanf(buffer, "%lu", &result) != 1) {
+ log_error("sysfs file %s not in expected format: %s", path,
+ buffer);
+ goto out;
+ }
+
+ log_very_verbose("Device %s %s is %lu bytes.",
+ dev_name(dev), attribute, result);
+
+out:
+ if (fclose(fp))
+ log_sys_error("fclose", path);
+
+ return result >> SECTOR_SHIFT;
+}
+
+unsigned long dev_alignment_offset(const char *sysfs_dir,
+ struct device *dev)
+{
+ return _dev_topology_attribute("alignment_offset",
+ sysfs_dir, dev);
+}
+
+#else
+
+unsigned long dev_alignment_offset(const char *sysfs_dir,
+ struct device *dev)
+{
+ return 0UL;
+}
+
+#endif
Index: LVM2/lib/device/device.h
===================================================================
--- LVM2.orig/lib/device/device.h
+++ LVM2/lib/device/device.h
@@ -100,4 +100,7 @@ unsigned long dev_md_stripe_width(const
int is_partitioned_dev(struct device *dev);
+unsigned long dev_alignment_offset(const char *sysfs_dir,
+ struct device *dev);
+
#endif
Index: LVM2/lib/format_text/format-text.c
===================================================================
--- LVM2.orig/lib/format_text/format-text.c
+++ LVM2/lib/format_text/format-text.c
@@ -1711,8 +1711,12 @@ static int _text_pv_setup(const struct f
"%lu sectors (requested %lu sectors)",
pv_dev_name(pv), pv->pe_align, data_alignment);
- if (!pe_start)
- set_pe_align_offset(pv, data_alignment_offset);
+ if (!pe_start &&
+ (set_pe_align_offset(pv, data_alignment_offset) != data_alignment_offset) &&
+ data_alignment_offset)
+ log_warn("WARNING: %s: Overriding data alignment offset to "
+ "%lu sectors (requested %lu sectors)",
+ pv_dev_name(pv), pv->pe_align_offset, data_alignment_offset);
if (pv->pe_start < pv->pe_align) {
pv->pe_start = pv->pe_align;
Index: LVM2/lib/metadata/metadata.c
===================================================================
--- LVM2.orig/lib/metadata/metadata.c
+++ LVM2/lib/metadata/metadata.c
@@ -115,6 +115,14 @@ unsigned long set_pe_align_offset(struct
if (!pv->dev)
goto out;
+ if (find_config_tree_bool(pv->fmt->cmd,
+ "devices/data_alignment_offset_detection",
+ DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION))
+ pv->pe_align_offset =
+ MAX(pv->pe_align_offset,
+ dev_alignment_offset(pv->fmt->cmd->sysfs_dir,
+ pv->dev));
+
log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
dev_name(pv->dev), pv->pe_align_offset);
Index: LVM2/man/lvm.conf.5.in
===================================================================
--- LVM2.orig/man/lvm.conf.5.in
+++ LVM2/man/lvm.conf.5.in
@@ -142,10 +142,17 @@ when creating a new Physical Volume usin
If a Physical Volume is placed directly upon an md device and
\fBmd_chunk_alignment\fP is enabled this parameter is ignored.
Set to 0 to use the default alignment of 64KB or the page size, if larger.
+.IP
+\fBdata_alignment_offset_detection\fP \(em If set to 1, and your kernel
+provides topology information in sysfs for the Physical Volume, the
+start of the aligned data area of the Physical Volume will be padded
+with the alignment_offset exposed in sysfs.
.sp
To see the location of the first Physical Extent of an existing Physical Volume
use \fBpvs -o +pe_start\fP . It will be a multiple of the requested
-\fBdata_alignment\fP.
+\fBdata_alignment\fP plus the alignment_offset from
+\fBdata_alignment_offset_detection\fP (if enabled) or the pvcreate
+commandline.
.TP
\fBlog\fP \(em Default log settings
.IP
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 3/6] Add devices/data_alignment_detection to lvm.conf.
2009-07-25 22:14 [PATCH 0/6 v7] LVM2 topology support Mike Snitzer
2009-07-25 22:14 ` [PATCH 1/6] Add --dataalignmentoffset to pvcreate to shift start of aligned data area Mike Snitzer
2009-07-25 22:14 ` [PATCH 2/6] Add devices/data_alignment_offset_detection to lvm.conf Mike Snitzer
@ 2009-07-25 22:14 ` Mike Snitzer
2009-07-25 22:14 ` [PATCH 4/6] Improve ability to lookup primary device associated with partition Mike Snitzer
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2009-07-25 22:14 UTC (permalink / raw)
To: lvm-devel
Adds 'data_alignment_detection' config option to the devices section of
lvm.conf. If your kernel provides topology information in sysfs (linux
>= 2.6.31) for the Physical Volume, the start of data area will be
aligned on a multiple of the ?minimum_io_size? or ?optimal_io_size?
exposed in sysfs.
minimum_io_size is used if optimal_io_size is undefined (0). If both
md_chunk_alignment and data_alignment_detection are enabled the result
of data_alignment_detection is used.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
WHATS_NEW | 1
doc/example.conf | 12 ++++++-
lib/config/defaults.h | 1
lib/device/device.c | 75 ++++++++++++++++++++++++++++++++++++++++++++----
lib/device/device.h | 6 +++
lib/metadata/metadata.c | 19 ++++++++++++
man/lvm.conf.5.in | 13 +++++++-
7 files changed, 118 insertions(+), 9 deletions(-)
Index: LVM2/WHATS_NEW
===================================================================
--- LVM2.orig/WHATS_NEW
+++ LVM2/WHATS_NEW
@@ -1,5 +1,6 @@
Version 2.02.50 -
================================
+ Add devices/data_alignment_detection to lvm.conf.
Add devices/data_alignment_offset_detection to lvm.conf.
Add --dataalignmentoffset to pvcreate to shift start of aligned data area.
Add global/wait_for_locks to lvm.conf so blocking for locks can be disabled.
Index: LVM2/doc/example.conf
===================================================================
--- LVM2.orig/doc/example.conf
+++ LVM2/doc/example.conf
@@ -98,9 +98,17 @@ devices {
# 1 enables; 0 disables.
md_chunk_alignment = 1
+ # By default, the start of a PV's data area will be aligned with
+ # the 'minimum_io_size' or 'optimal_io_size' exposed in sysfs.
+ # minimum_io_size is used if optimal_io_size is undefined (0).
+ # If md_chunk_alignment is enabled, that detects the optimal_io_size.
+ # This setting takes precedence over md_chunk_alignment.
+ # 1 enables; 0 disables.
+ data_alignment_detection = 1
+
# Alignment (in KB) of start of data area when creating a new PV.
- # If a PV is placed directly upon an md device and md_chunk_alignment is
- # enabled this parameter is ignored.
+ # If a PV is placed directly upon an md device and md_chunk_alignment or
+ # data_alignment_detection is enabled this parameter is ignored.
# Set to 0 for the default alignment of 64KB or page size, if larger.
data_alignment = 0
Index: LVM2/lib/config/defaults.h
===================================================================
--- LVM2.orig/lib/config/defaults.h
+++ LVM2/lib/config/defaults.h
@@ -35,6 +35,7 @@
#define DEFAULT_MD_CHUNK_ALIGNMENT 1
#define DEFAULT_IGNORE_SUSPENDED_DEVICES 1
#define DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION 1
+#define DEFAULT_DATA_ALIGNMENT_DETECTION 1
#define DEFAULT_LOCK_DIR "/var/lock/lvm"
#define DEFAULT_LOCKING_LIB "liblvm2clusterlock.so"
Index: LVM2/lib/device/device.c
===================================================================
--- LVM2.orig/lib/device/device.c
+++ LVM2/lib/device/device.c
@@ -285,13 +285,36 @@ int _get_partition_type(struct dev_mgr *
#ifdef linux
+static int _primary_dev(const char *sysfs_dir,
+ struct device *dev, dev_t *result)
+{
+ char path[PATH_MAX+1];
+ struct stat info;
+
+ /* check if dev is a partition */
+ if (dm_snprintf(path, PATH_MAX, "%s/dev/block/%d:%d/partition",
+ sysfs_dir, (int)MAJOR(dev->dev), (int)MINOR(dev->dev)) < 0) {
+ log_error("dm_snprintf partition failed");
+ return 0;
+ }
+
+ if (stat(path, &info) < 0)
+ return 0;
+
+ *result = dev->dev -
+ (MINOR(dev->dev) % max_partitions(MAJOR(dev->dev)));
+ return 1;
+}
+
static unsigned long _dev_topology_attribute(const char *attribute,
const char *sysfs_dir,
struct device *dev)
{
+ const char *sysfs_fmt_str = "%s/dev/block/%d:%d/%s";
char path[PATH_MAX+1], buffer[64];
FILE *fp;
struct stat info;
+ dev_t uninitialized_var(primary);
unsigned long result = 0UL;
if (!attribute || !*attribute)
@@ -300,16 +323,32 @@ static unsigned long _dev_topology_attri
if (!sysfs_dir || !*sysfs_dir)
return_0;
- if (dm_snprintf(path, PATH_MAX, "%s/dev/block/%d:%d/%s",
- sysfs_dir, (int)MAJOR(dev->dev), (int)MINOR(dev->dev),
+ if (dm_snprintf(path, PATH_MAX, sysfs_fmt_str, sysfs_dir,
+ (int)MAJOR(dev->dev), (int)MINOR(dev->dev),
attribute) < 0) {
log_error("dm_snprintf %s failed", attribute);
return 0;
}
- /* check if the desired sysfs attribute exists */
- if (stat(path, &info) < 0)
- return 0;
+ /*
+ * check if the desired sysfs attribute exists
+ * - if not: either the kernel doesn't have topology support
+ * or the device could be a partition
+ */
+ if (stat(path, &info) < 0) {
+ if (!_primary_dev(sysfs_dir, dev, &primary))
+ return 0;
+
+ /* get attribute from partition's primary device */
+ if (dm_snprintf(path, PATH_MAX, sysfs_fmt_str, sysfs_dir,
+ (int)MAJOR(primary), (int)MINOR(primary),
+ attribute) < 0) {
+ log_error("pri dm_snprintf %s failed", attribute);
+ return 0;
+ }
+ if (stat(path, &info) < 0)
+ return 0;
+ }
if (!(fp = fopen(path, "r"))) {
log_sys_error("fopen", path);
@@ -344,6 +383,20 @@ unsigned long dev_alignment_offset(const
sysfs_dir, dev);
}
+unsigned long dev_minimum_io_size(const char *sysfs_dir,
+ struct device *dev)
+{
+ return _dev_topology_attribute("queue/minimum_io_size",
+ sysfs_dir, dev);
+}
+
+unsigned long dev_optimal_io_size(const char *sysfs_dir,
+ struct device *dev)
+{
+ return _dev_topology_attribute("queue/optimal_io_size",
+ sysfs_dir, dev);
+}
+
#else
unsigned long dev_alignment_offset(const char *sysfs_dir,
@@ -352,4 +405,16 @@ unsigned long dev_alignment_offset(const
return 0UL;
}
+unsigned long dev_minimum_io_size(const char *sysfs_dir,
+ struct device *dev)
+{
+ return 0UL;
+}
+
+unsigned long dev_optimal_io_size(const char *sysfs_dir,
+ struct device *dev)
+{
+ return 0UL;
+}
+
#endif
Index: LVM2/lib/device/device.h
===================================================================
--- LVM2.orig/lib/device/device.h
+++ LVM2/lib/device/device.h
@@ -103,4 +103,10 @@ int is_partitioned_dev(struct device *de
unsigned long dev_alignment_offset(const char *sysfs_dir,
struct device *dev);
+unsigned long dev_minimum_io_size(const char *sysfs_dir,
+ struct device *dev);
+
+unsigned long dev_optimal_io_size(const char *sysfs_dir,
+ struct device *dev);
+
#endif
Index: LVM2/lib/metadata/metadata.c
===================================================================
--- LVM2.orig/lib/metadata/metadata.c
+++ LVM2/lib/metadata/metadata.c
@@ -96,6 +96,25 @@ unsigned long set_pe_align(struct physic
dev_md_stripe_width(pv->fmt->cmd->sysfs_dir,
pv->dev));
+ /*
+ * Align to topology's minimum_io_size or optimal_io_size if present
+ * - minimum_io_size - the smallest request the device can perform
+ * w/o incurring a read-modify-write penalty (e.g. MD's chunk size)
+ * - optimal_io_size - the device's preferred unit of receiving I/O
+ * (e.g. MD's stripe width)
+ */
+ if (find_config_tree_bool(pv->fmt->cmd,
+ "devices/data_alignment_detection",
+ DEFAULT_DATA_ALIGNMENT_DETECTION)) {
+ pv->pe_align = MAX(pv->pe_align,
+ dev_minimum_io_size(pv->fmt->cmd->sysfs_dir,
+ pv->dev));
+
+ pv->pe_align = MAX(pv->pe_align,
+ dev_optimal_io_size(pv->fmt->cmd->sysfs_dir,
+ pv->dev));
+ }
+
log_very_verbose("%s: Setting PE alignment to %lu sectors.",
dev_name(pv->dev), pv->pe_align);
Index: LVM2/man/lvm.conf.5.in
===================================================================
--- LVM2.orig/man/lvm.conf.5.in
+++ LVM2/man/lvm.conf.5.in
@@ -137,11 +137,20 @@ has been reused without wiping the md su
directly upon an md device, LVM2 will align its data blocks with the
md device's stripe-width.
.IP
+\fBdata_alignment_detection\fP \(em If set to 1, and your kernel provides
+topology information in sysfs for the Physical Volume, the start of data
+area will be aligned on a multiple of the ?minimum_io_size? or
+?optimal_io_size? exposed in sysfs. minimum_io_size is used if
+optimal_io_size is undefined (0). If both \fBmd_chunk_alignment\fP and
+\fBdata_alignment_detection\fP are enabled the result of
+\fBdata_alignment_detection\fP is used.
+.IP
\fBdata_alignment\fP \(em Default alignment (in KB) of start of data area
when creating a new Physical Volume using the \fBlvm2\fP format.
If a Physical Volume is placed directly upon an md device and
-\fBmd_chunk_alignment\fP is enabled this parameter is ignored.
-Set to 0 to use the default alignment of 64KB or the page size, if larger.
+\fBmd_chunk_alignment\fP or \fBdata_alignment_detection\fP is enabled
+this parameter is ignored. Set to 0 to use the default alignment of
+64KB or the page size, if larger.
.IP
\fBdata_alignment_offset_detection\fP \(em If set to 1, and your kernel
provides topology information in sysfs for the Physical Volume, the
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 4/6] Improve ability to lookup primary device associated with partition
2009-07-25 22:14 [PATCH 0/6 v7] LVM2 topology support Mike Snitzer
` (2 preceding siblings ...)
2009-07-25 22:14 ` [PATCH 3/6] Add devices/data_alignment_detection " Mike Snitzer
@ 2009-07-25 22:14 ` Mike Snitzer
2009-07-25 22:14 ` [PATCH 5/6] Retrieve MD sysfs attributes for MD partitions Mike Snitzer
2009-07-25 22:14 ` [PATCH 6/6] Add MD, partition and topology tests to the LVM2 test-suite Mike Snitzer
5 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2009-07-25 22:14 UTC (permalink / raw)
To: lvm-devel
Improve lib/device/device.c:_primary_dev()'s ability to look up the
primary device associated with all partitions; including blkext
(e.g. partitions directly on MD). The same will also work for obscure
sysfs paths; e.g.: paths with mangled names like the HP cciss driver
uses: /sys/block/cciss!c0d0/cciss!c0d0p1/
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
lib/device/device.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 56 insertions(+), 5 deletions(-)
diff --git a/lib/device/device.c b/lib/device/device.c
index e627517..93cd4bb 100644
--- a/lib/device/device.c
+++ b/lib/device/device.c
@@ -13,6 +13,7 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <libgen.h> /* dirname, basename */
#include "lib.h"
#include "lvm-types.h"
#include "device.h"
@@ -289,21 +290,71 @@ static int _primary_dev(const char *sysfs_dir,
struct device *dev, dev_t *result)
{
char path[PATH_MAX+1];
+ char temp_path[PATH_MAX+1];
+ char buffer[64];
struct stat info;
+ FILE *fp;
+ int pri_maj, pri_min;
+ int ret = 0;
/* check if dev is a partition */
if (dm_snprintf(path, PATH_MAX, "%s/dev/block/%d:%d/partition",
sysfs_dir, (int)MAJOR(dev->dev), (int)MINOR(dev->dev)) < 0) {
log_error("dm_snprintf partition failed");
- return 0;
+ return ret;
}
if (stat(path, &info) < 0)
- return 0;
+ return ret;
- *result = dev->dev -
- (MINOR(dev->dev) % max_partitions(MAJOR(dev->dev)));
- return 1;
+ /*
+ * extract parent's path from the partition's symlink, e.g.:
+ * - readlink /sys/dev/block/259:0 = ../../block/md0/md0p1
+ * - dirname ../../block/md0/md0p1 = ../../block/md0
+ * - basename ../../block/md0/md0 = md0
+ * Parent's 'dev' sysfs attribute = /sys/block/md0/dev
+ */
+ if (readlink(dirname(path), temp_path, PATH_MAX) < 0) {
+ log_sys_error("readlink", path);
+ return ret;
+ }
+
+ if (dm_snprintf(path, PATH_MAX, "%s/block/%s/dev",
+ sysfs_dir, basename(dirname(temp_path))) < 0) {
+ log_error("dm_snprintf dev failed");
+ return ret;
+ }
+
+ /* finally, parse 'dev' attribute and create corresponding dev_t */
+ if (stat(path, &info) < 0) {
+ log_error("sysfs file %s does not exist", path);
+ return ret;
+ }
+
+ fp = fopen(path, "r");
+ if (!fp) {
+ log_sys_error("fopen", path);
+ return ret;
+ }
+
+ if (!fgets(buffer, sizeof(buffer), fp)) {
+ log_sys_error("fgets", path);
+ goto out;
+ }
+
+ if (sscanf(buffer, "%d:%d", &pri_maj, &pri_min) != 2) {
+ log_error("sysfs file %s not in expected MAJ:MIN format: %s",
+ path, buffer);
+ goto out;
+ }
+ *result = makedev(pri_maj, pri_min);
+ ret = 1;
+
+out:
+ if (fclose(fp))
+ log_sys_error("fclose", path);
+
+ return ret;
}
static unsigned long _dev_topology_attribute(const char *attribute,
--
1.6.2.5
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 5/6] Retrieve MD sysfs attributes for MD partitions
2009-07-25 22:14 [PATCH 0/6 v7] LVM2 topology support Mike Snitzer
` (3 preceding siblings ...)
2009-07-25 22:14 ` [PATCH 4/6] Improve ability to lookup primary device associated with partition Mike Snitzer
@ 2009-07-25 22:14 ` Mike Snitzer
2009-07-25 22:14 ` [PATCH 6/6] Add MD, partition and topology tests to the LVM2 test-suite Mike Snitzer
5 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2009-07-25 22:14 UTC (permalink / raw)
To: lvm-devel
Rename private _primary_dev() to a public get_primary_dev() and reuse it
to allow retrieval of the MD sysfs attributes (raid level, etc) for MD
partitions.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
lib/device/dev-md.c | 13 +++++++++----
lib/device/device.c | 12 +++++++++---
lib/device/device.h | 3 +++
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c
index 5525b4f..be33775 100644
--- a/lib/device/dev-md.c
+++ b/lib/device/dev-md.c
@@ -131,16 +131,21 @@ static int _md_sysfs_attribute_snprintf(char *path, size_t size,
const char *attribute)
{
struct stat info;
+ dev_t _dev = dev->dev;
int ret = -1;
- if (MAJOR(dev->dev) != md_major())
+ if (!sysfs_dir || !*sysfs_dir)
return ret;
- if (!sysfs_dir || !*sysfs_dir)
+check_md_major:
+ if (MAJOR(_dev) != md_major()) {
+ if (get_primary_dev(sysfs_dir, dev, &_dev))
+ goto check_md_major;
return ret;
+ }
ret = dm_snprintf(path, size, "%s/dev/block/%d:%d/md/%s", sysfs_dir,
- (int)MAJOR(dev->dev), (int)MINOR(dev->dev), attribute);
+ (int)MAJOR(_dev), (int)MINOR(_dev), attribute);
if (ret < 0) {
log_error("dm_snprintf md %s failed", attribute);
return ret;
@@ -149,7 +154,7 @@ static int _md_sysfs_attribute_snprintf(char *path, size_t size,
if (stat(path, &info) < 0) {
/* old sysfs structure */
ret = dm_snprintf(path, size, "%s/block/md%d/md/%s",
- sysfs_dir, (int)MINOR(dev->dev), attribute);
+ sysfs_dir, (int)MINOR(_dev), attribute);
if (ret < 0) {
log_error("dm_snprintf old md %s failed", attribute);
return ret;
diff --git a/lib/device/device.c b/lib/device/device.c
index 93cd4bb..24d5075 100644
--- a/lib/device/device.c
+++ b/lib/device/device.c
@@ -286,8 +286,8 @@ int _get_partition_type(struct dev_mgr *dm, struct device *d)
#ifdef linux
-static int _primary_dev(const char *sysfs_dir,
- struct device *dev, dev_t *result)
+int get_primary_dev(const char *sysfs_dir,
+ struct device *dev, dev_t *result)
{
char path[PATH_MAX+1];
char temp_path[PATH_MAX+1];
@@ -387,7 +387,7 @@ static unsigned long _dev_topology_attribute(const char *attribute,
* or the device could be a partition
*/
if (stat(path, &info) < 0) {
- if (!_primary_dev(sysfs_dir, dev, &primary))
+ if (!get_primary_dev(sysfs_dir, dev, &primary))
return 0;
/* get attribute from partition's primary device */
@@ -450,6 +450,12 @@ unsigned long dev_optimal_io_size(const char *sysfs_dir,
#else
+int get_primary_dev(const char *sysfs_dir,
+ struct device *dev, dev_t *result)
+{
+ return 0;
+}
+
unsigned long dev_alignment_offset(const char *sysfs_dir,
struct device *dev)
{
diff --git a/lib/device/device.h b/lib/device/device.h
index 454fd0b..4ccf9a9 100644
--- a/lib/device/device.h
+++ b/lib/device/device.h
@@ -100,6 +100,9 @@ unsigned long dev_md_stripe_width(const char *sysfs_dir, struct device *dev);
int is_partitioned_dev(struct device *dev);
+int get_primary_dev(const char *sysfs_dir,
+ struct device *dev, dev_t *result);
+
unsigned long dev_alignment_offset(const char *sysfs_dir,
struct device *dev);
--
1.6.2.5
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 6/6] Add MD, partition and topology tests to the LVM2 test-suite
2009-07-25 22:14 [PATCH 0/6 v7] LVM2 topology support Mike Snitzer
` (4 preceding siblings ...)
2009-07-25 22:14 ` [PATCH 5/6] Retrieve MD sysfs attributes for MD partitions Mike Snitzer
@ 2009-07-25 22:14 ` Mike Snitzer
5 siblings, 0 replies; 8+ messages in thread
From: Mike Snitzer @ 2009-07-25 22:14 UTC (permalink / raw)
To: lvm-devel
Add MD, partition, and topology tests to the LVM2 test-suite.
Added MD devices to the filter in test-utils.sh:prepare_lvmconf()
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
test/t-pvcreate-operation-md.sh | 86 ++++++++++++++++++++++++++++++++++++++++
test/t-pvcreate-usage.sh | 12 +++++
test/test-utils.sh | 2
3 files changed, 99 insertions(+), 1 deletion(-)
create mode 100644 test/t-pvcreate-operation-md.sh
Index: LVM2/test/t-pvcreate-operation-md.sh
===================================================================
--- /dev/null
+++ LVM2/test/t-pvcreate-operation-md.sh
@@ -0,0 +1,86 @@
+# Copyright (C) 2009 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# skip this test if mdadm or sfdisk aren't available
+which mdadm || exit 200
+which sfdisk || exit 200
+
+. ./test-utils.sh
+
+aux prepare_devs 2
+
+# Have MD use a non-standard name to avoid colliding with an existing MD device
+mddev=/dev/md_lvm_test0
+mddev_p=${mddev}p1
+
+cleanup_md() {
+ mdadm --stop $mddev
+ rm -f $mddev
+ teardown
+}
+
+# create 2 disk MD raid0 array (stripe_width=128K)
+[ -b "$mddev" ] && exit 200
+mdadm --create $mddev --auto=md --level 0 --raid-devices=2 --chunk 64 $dev1 $dev2
+trap 'aux cleanup_md' EXIT # cleanup this MD device at the end of the test
+
+# Test alignment of PV on MD without any MD-aware or topology-aware detection
+# - should treat $mddev just like any other block device
+pv_align="192.00K"
+pvcreate --metadatasize 128k \
+ --config 'devices {md_chunk_alignment=0 data_alignment_detection=0 data_alignment_offset_detection=0}' \
+ $mddev
+check_pv_field_ $mddev pe_start $pv_align
+
+# Test md_chunk_alignment independent of topology-aware detection
+pv_align="256.00K"
+pvcreate --metadatasize 128k \
+ --config 'devices {data_alignment_detection=0 data_alignment_offset_detection=0}' \
+ $mddev
+check_pv_field_ $mddev pe_start $pv_align
+
+# Test newer topology-aware alignment detection
+pv_align="256.00K"
+pvcreate --metadatasize 128k \
+ --config 'devices { md_chunk_alignment=0 }' $mddev
+check_pv_field_ $mddev pe_start $pv_align
+
+# partition MD array directly, depends on blkext in Linux >= 2.6.28
+linux_minor=$(echo `uname -r` | cut -d'.' -f3 | cut -d'-' -f1)
+if [ $linux_minor -gt 27 ]; then
+ sfdisk $mddev <<EOF
+,,83
+EOF
+ # make sure partition on MD is _not_ removed
+ # - tests partition -> parent lookup via sysfs paths
+ not pvcreate --metadatasize 128k $mddev
+
+ # verify alignment_offset padding is accounted for in pe_start
+ # - topology infrastructure is available in Linux >= 2.6.31
+ # - also tests partition -> parent lookup via sysfs paths
+ base_mddev=`basename $mddev`
+ base_mddev_p=`basename $mddev_p`
+ sysfs_alignment_offset=/sys/block/${base_mddev}/${base_mddev_p}/alignment_offset
+ [ -f "$sysfs_alignment_offset" ] && \
+ alignment_offset=`cat $sysfs_alignment_offset` || \
+ alignment_offset=0
+
+ if [ "$alignment_offset" = "512" ]; then
+ pv_align="256.50K"
+ pvcreate --metadatasize 128k $mddev_p
+ check_pv_field_ $mddev_p pe_start $pv_align
+ pvremove $mddev_p
+ elif [ "$alignment_offset" = "2048" ]; then
+ pv_align="258.00K"
+ pvcreate --metadatasize 128k $mddev_p
+ check_pv_field_ $mddev_p pe_start $pv_align
+ pvremove $mddev_p
+ fi
+fi
Index: LVM2/test/t-pvcreate-usage.sh
===================================================================
--- LVM2.orig/test/t-pvcreate-usage.sh
+++ LVM2/test/t-pvcreate-usage.sh
@@ -116,6 +116,18 @@ check_pv_field_ $dev1 pe_start $pv_align
pvcreate --metadatasize 128k --metadatacopies 2 --dataalignment 3.5k $dev1
check_pv_field_ $dev1 pe_start $pv_align
+# data area is aligned to 64k by default,
+# data area has additional padding before its start
+pv_align="195.50K"
+pvcreate --metadatasize 128k --dataalignmentoffset 7s $dev1
+check_pv_field_ $dev1 pe_start $pv_align
+
+# 2nd metadata area is created without problems when
+# additional data area padding is used
+pvcreate --metadatasize 128k --metadatacopies 2 --dataalignmentoffset 7s $dev1
+check_pv_field_ $dev1 pv_mda_count 2
+# FIXME: compare start of 2nd mda with and without --dataalignmentoffset
+
#COMM 'pv with LVM1 compatible data alignment can be convereted'
#compatible == LVM1_PE_ALIGN == 64k
pvcreate --dataalignment 256k $dev1
Index: LVM2/test/test-utils.sh
===================================================================
--- LVM2.orig/test/test-utils.sh
+++ LVM2/test/test-utils.sh
@@ -182,7 +182,7 @@ prepare_lvmconf() {
devices {
dir = "$G_dev_"
scan = "$G_dev_"
- filter = [ "a/dev\/mirror/", "a/dev\/mapper\/.*pv[0-9_]*$/", "r/.*/" ]
+ filter = [ "a|/dev/md.*|", "a/dev\/mirror/", "a/dev\/mapper\/.*pv[0-9_]*$/", "r/.*/" ]
cache_dir = "$G_root_/etc"
sysfs_scan = 0
}
^ permalink raw reply [flat|nested] 8+ messages in thread