From: Jonathan Brassow <jbrassow@redhat.com>
To: lvm-devel@redhat.com
Subject: [PATCH] DM RAID: Add ability to throttle sync operations for RAID LVs.
Date: Thu, 30 May 2013 16:15:07 -0500 [thread overview]
Message-ID: <1369948507.8964.3.camel@f16> (raw)
Please take a look at the following patch that adds throttling for RAID
sync operations. Any feedback on what the 'lvs' header should look like
when reporting the min/max recovery rate would be helpful also. (Right
now it is MinSync and MaxSync.)
brassow
This patch adds the ability to set the minimum and maximum I/O rate for
sync operations in RAID LVs. The options are available for 'lvcreate' and
'lvchange' and are as follows:
--minrecoveryrate <Rate>
--maxrecoveryrate <Rate>
The rate is specified in kiB/sec/disk. Setting the rate to 0 removes the
preference.
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Index: lvm2/lib/metadata/metadata-exported.h
===================================================================
--- lvm2.orig/lib/metadata/metadata-exported.h
+++ lvm2/lib/metadata/metadata-exported.h
@@ -337,6 +337,8 @@ struct lv_segment {
/* FIXME Fields depend on segment type */
uint32_t stripe_size; /* For stripe and RAID - in sectors */
uint32_t writebehind; /* For RAID (RAID1 only) */
+ uint32_t min_recovery_rate; /* For RAID */
+ uint32_t max_recovery_rate; /* For RAID */
uint32_t area_count;
uint32_t area_len;
uint32_t chunk_size; /* For snapshots/thin_pool. In sectors. */
@@ -631,6 +633,9 @@ struct lvcreate_params {
uint32_t mirrors; /* mirror */
+ uint32_t min_recovery_rate; /* RAID */
+ uint32_t max_recovery_rate; /* RAID */
+
const struct segment_type *segtype; /* all */
unsigned target_attr; /* all */
Index: lvm2/lib/raid/raid.c
===================================================================
--- lvm2.orig/lib/raid/raid.c
+++ lvm2/lib/raid/raid.c
@@ -129,6 +129,24 @@ static int _raid_text_import(struct lv_s
return 0;
}
}
+ if (dm_config_has_node(sn, "min_recovery_rate")) {
+ if (!dm_config_get_uint32(sn, "min_recovery_rate",
+ &seg->min_recovery_rate)) {
+ log_error("Couldn't read 'min_recovery_rate' for "
+ "segment %s of logical volume %s.",
+ dm_config_parent_name(sn), seg->lv->name);
+ return 0;
+ }
+ }
+ if (dm_config_has_node(sn, "max_recovery_rate")) {
+ if (!dm_config_get_uint32(sn, "max_recovery_rate",
+ &seg->max_recovery_rate)) {
+ log_error("Couldn't read 'max_recovery_rate' for "
+ "segment %s of logical volume %s.",
+ dm_config_parent_name(sn), seg->lv->name);
+ return 0;
+ }
+ }
if (!dm_config_get_list(sn, "raids", &cv)) {
log_error("Couldn't find RAID array for "
"segment %s of logical volume %s.",
@@ -155,6 +173,10 @@ static int _raid_text_export(const struc
outf(f, "stripe_size = %" PRIu32, seg->stripe_size);
if (seg->writebehind)
outf(f, "writebehind = %" PRIu32, seg->writebehind);
+ if (seg->min_recovery_rate)
+ outf(f, "min_recovery_rate = %" PRIu32, seg->min_recovery_rate);
+ if (seg->max_recovery_rate)
+ outf(f, "max_recovery_rate = %" PRIu32, seg->max_recovery_rate);
return out_areas(f, seg, "raid");
}
@@ -227,6 +249,8 @@ static int _raid_add_target_line(struct
params.stripe_size = seg->stripe_size;
params.rebuilds = rebuilds;
params.writemostly = writemostly;
+ params.min_recovery_rate = seg->min_recovery_rate;
+ params.max_recovery_rate = seg->max_recovery_rate;
params.flags = flags;
if (!dm_tree_node_add_raid_target_with_params(node, len, ¶ms))
Index: lvm2/lib/report/columns.h
===================================================================
--- lvm2.orig/lib/report/columns.h
+++ lvm2/lib/report/columns.h
@@ -83,6 +83,8 @@ FIELD(LVS, lv, NUM, "Cpy%Sync", lvid, 8,
FIELD(LVS, lv, NUM, "Mismatches", lvid, 10, mismatch_count, mismatches, "For RAID, number of mismatches found or repaired.", 0)
FIELD(LVS, lv, STR, "SyncAction", lvid, 10, sync_action, syncaction, "For RAID, the current synchronization action being performed.", 0)
FIELD(LVS, lv, NUM, "WBehind", lvid, 7, write_behind, writebehind, "For RAID1, the number of outstanding writes allowed to writemostly devices.", 0)
+FIELD(LVS, lv, NUM, "MinSync", lvid, 7, min_recovery_rate, minrecoveryrate, "For RAID1, the minimum recovery I/O load in kiB/sec/disk.", 0)
+FIELD(LVS, lv, NUM, "MaxSync", lvid, 7, max_recovery_rate, maxrecoveryrate, "For RAID1, the maximum recovery I/O load in kiB/sec/disk.", 0)
FIELD(LVS, lv, STR, "Move", lvid, 4, movepv, move_pv, "For pvmove, Source PV of temporary LV created by pvmove.", 0)
FIELD(LVS, lv, STR, "Convert", lvid, 7, convertlv, convert_lv, "For lvconvert, Name of temporary LV created by lvconvert.", 0)
FIELD(LVS, lv, STR, "Log", lvid, 3, loglv, mirror_log, "For mirrors, the LV holding the synchronisation log.", 0)
Index: lvm2/lib/report/properties.c
===================================================================
--- lvm2.orig/lib/report/properties.c
+++ lvm2/lib/report/properties.c
@@ -113,6 +113,14 @@ static uint32_t _writebehind(const struc
return first_seg(lv)->writebehind;
}
+static uint32_t _minrecoveryrate(const struct logical_volume *lv) {
+ return first_seg(lv)->min_recovery_rate;
+}
+
+static uint32_t _maxrecoveryrate(const struct logical_volume *lv) {
+ return first_seg(lv)->max_recovery_rate;
+}
+
static percent_t _snap_percent(const struct logical_volume *lv) {
percent_t perc;
@@ -219,6 +227,10 @@ GET_LV_NUM_PROPERTY_FN(mismatches, _mism
#define _mismatches_set _not_implemented_set
GET_LV_NUM_PROPERTY_FN(writebehind, _writebehind(lv))
#define _writebehind_set _not_implemented_set
+GET_LV_NUM_PROPERTY_FN(minrecoveryrate, _minrecoveryrate(lv))
+#define _minrecoveryrate_set _not_implemented_set
+GET_LV_NUM_PROPERTY_FN(maxrecoveryrate, _maxrecoveryrate(lv))
+#define _maxrecoveryrate_set _not_implemented_set
GET_LV_STR_PROPERTY_FN(syncaction, _sync_action(lv))
#define _syncaction_set _not_implemented_set
GET_LV_STR_PROPERTY_FN(move_pv, lv_move_pv_dup(lv->vg->vgmem, lv))
Index: lvm2/lib/report/report.c
===================================================================
--- lvm2.orig/lib/report/report.c
+++ lvm2/lib/report/report.c
@@ -1002,6 +1002,40 @@ static int _write_behind_disp(struct dm_
return dm_report_field_uint32(rh, field, &first_seg(lv)->writebehind);
}
+static int _min_recovery_rate_disp(struct dm_report *rh __attribute__((unused)),
+ struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct logical_volume *lv = (const struct logical_volume *) data;
+
+ if (!lv_is_raid_type(lv) || !first_seg(lv)->min_recovery_rate) {
+ dm_report_field_set_value(field, "", NULL);
+ return 1;
+ }
+
+ return dm_report_field_uint32(rh, field,
+ &first_seg(lv)->min_recovery_rate);
+}
+
+static int _max_recovery_rate_disp(struct dm_report *rh __attribute__((unused)),
+ struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct logical_volume *lv = (const struct logical_volume *) data;
+
+ if (!lv_is_raid_type(lv) || !first_seg(lv)->max_recovery_rate) {
+ dm_report_field_set_value(field, "", NULL);
+ return 1;
+ }
+
+ return dm_report_field_uint32(rh, field,
+ &first_seg(lv)->max_recovery_rate);
+}
+
static int _dtpercent_disp(int metadata, struct dm_report *rh,
struct dm_pool *mem,
struct dm_report_field *field,
Index: lvm2/libdm/libdm-deptree.c
===================================================================
--- lvm2.orig/libdm/libdm-deptree.c
+++ lvm2/libdm/libdm-deptree.c
@@ -183,9 +183,11 @@ struct load_segment {
struct dm_tree_node *replicator;/* Replicator-dev */
uint64_t rdevice_index; /* Replicator-dev */
- uint64_t rebuilds; /* raid */
- uint64_t writemostly; /* raid */
- uint32_t writebehind; /* raid */
+ uint64_t rebuilds; /* raid */
+ uint64_t writemostly; /* raid */
+ uint32_t writebehind; /* raid */
+ uint32_t max_recovery_rate; /* raid kB/sec/disk */
+ uint32_t min_recovery_rate; /* raid kB/sec/disk */
struct dm_tree_node *metadata; /* Thin_pool */
struct dm_tree_node *pool; /* Thin_pool, Thin */
@@ -2133,6 +2135,12 @@ static int _raid_emit_segment_line(struc
if (seg->writebehind)
param_count += 2;
+ if (seg->min_recovery_rate)
+ param_count += 2;
+
+ if (seg->max_recovery_rate)
+ param_count += 2;
+
/* rebuilds is 64-bit */
param_count += 2 * hweight32(seg->rebuilds & 0xFFFFFFFF);
param_count += 2 * hweight32(seg->rebuilds >> 32);
@@ -2166,6 +2174,14 @@ static int _raid_emit_segment_line(struc
if (seg->writebehind)
EMIT_PARAMS(pos, " writebehind %u", seg->writebehind);
+ if (seg->min_recovery_rate)
+ EMIT_PARAMS(pos, " min_recovery_rate %u",
+ seg->min_recovery_rate);
+
+ if (seg->max_recovery_rate)
+ EMIT_PARAMS(pos, " max_recovery_rate %u",
+ seg->max_recovery_rate);
+
/* Print number of metadata/data device pairs */
EMIT_PARAMS(pos, " %u", seg->area_count/2);
@@ -2901,6 +2917,8 @@ int dm_tree_node_add_raid_target_with_pa
seg->rebuilds = p->rebuilds;
seg->writemostly = p->writemostly;
seg->writebehind = p->writebehind;
+ seg->min_recovery_rate = p->min_recovery_rate;
+ seg->max_recovery_rate = p->max_recovery_rate;
seg->flags = p->flags;
return 1;
Index: lvm2/lib/metadata/lv_manip.c
===================================================================
--- lvm2.orig/lib/metadata/lv_manip.c
+++ lvm2/lib/metadata/lv_manip.c
@@ -4714,6 +4714,9 @@ static struct logical_volume *_lv_create
stack;
goto revert_new_lv;
}
+ } else if (seg_is_raid(lp)) {
+ first_seg(lv)->min_recovery_rate = lp->min_recovery_rate;
+ first_seg(lv)->max_recovery_rate = lp->max_recovery_rate;
}
/* FIXME Log allocation and attachment should have happened inside lv_extend. */
Index: lvm2/man/lvchange.8.in
===================================================================
--- lvm2.orig/man/lvchange.8.in
+++ lvm2/man/lvchange.8.in
@@ -26,6 +26,8 @@ lvchange \- change attributes of a logic
.RI { y | n }]
.RB [ \-\-poll
.RI { y | n }]
+.RB [ \-\-maxrecoveryrate " " \fIRate\fP ]
+.RB [ \-\-minrecoveryrate " " \fIRate\fP ]
.RB [ \-\-syncaction
.RI { check | repair }]
.RB [ \-\-sysinit ]
@@ -111,6 +113,16 @@ process from its last checkpoint. Howev
immediately poll a logical volume when it is activated, use
\fB\-\-poll n\fP to defer and then \fB\-\-poll y\fP to restart the process.
.TP
+.IR \fB\-\-maxrecoveryrate " " \fIRate
+Sets the maximum recovery rate for a RAID logical volume. \fIRate\fP
+is specified in kiB/sec/device. Setting the recovery rate to 0 means
+it will be unbounded.
+.TP
+.IR \fB\-\-minrecoveryrate " " \fIRate
+Sets the minimum recovery rate for a RAID logical volume. \fIRate\fP
+is specified in kiB/sec/device. Setting the recovery rate to 0 means
+it will be unbounded.
+.TP
.BR \-\-syncaction " {" \fIcheck | \fIrepair }
This argument is used to initiate various RAID synchronization operations.
The \fIcheck\fP and \fIrepair\fP options provide a way to check the
Index: lvm2/man/lvcreate.8.in
===================================================================
--- lvm2.orig/man/lvcreate.8.in
+++ lvm2/man/lvcreate.8.in
@@ -19,6 +19,8 @@ lvcreate \- create a logical volume in a
.RB [ \-\-ignoremonitoring ]
.RB [ \-\-monitor
.RI { y | n }]
+.RB [ \-\-maxrecoveryrate " " \fIRate\fP ]
+.RB [ \-\-minrecoveryrate " " \fIRate\fP ]
.RB [ \-i | \-\-stripes
.IR Stripes
.RB [ \-I | \-\-stripesize
@@ -243,6 +245,16 @@ Sets the name for the new logical volume
Without this option a default name of "lvol#" will be generated where
# is the LVM internal number of the logical volume.
.TP
+.IR \fB\-\-maxrecoveryrate " " \fIRate
+Sets the maximum recovery rate for a RAID logical volume. \fIRate\fP
+is specified in kiB/sec/device. Setting the recovery rate to 0 means
+it will be unbounded.
+.TP
+.IR \fB\-\-minrecoveryrate " " \fIRate
+Sets the minimum recovery rate for a RAID logical volume. \fIRate\fP
+is specified in kiB/sec/device. Setting the recovery rate to 0 means
+it will be unbounded.
+.TP
.B \-\-noudevsync
Disables udev synchronisation. The
process will not wait for notification from udev.
Index: lvm2/tools/args.h
===================================================================
--- lvm2.orig/tools/args.h
+++ lvm2/tools/args.h
@@ -89,6 +89,8 @@ arg(validate_ARG, '\0', "validate", NULL
arg(syncaction_ARG, '\0', "syncaction", string_arg, 0)
arg(writemostly_ARG, '\0', "writemostly", string_arg, ARG_GROUPABLE)
arg(writebehind_ARG, '\0', "writebehind", int_arg, 0)
+arg(minrecoveryrate_ARG, '\0', "minrecoveryrate", int_arg, 0)
+arg(maxrecoveryrate_ARG, '\0', "maxrecoveryrate", int_arg, 0)
/* Allow some variations */
arg(resizable_ARG, '\0', "resizable", yes_no_arg, 0)
Index: lvm2/tools/commands.h
===================================================================
--- lvm2.orig/tools/commands.h
+++ lvm2/tools/commands.h
@@ -84,6 +84,8 @@ xx(lvchange,
"\t[--monitor {y|n}]\n"
"\t[--poll {y|n}]\n"
"\t[--noudevsync]\n"
+ "\t[--minrecoveryrate Rate]\n"
+ "\t[--maxrecoveryrate Rate]\n"
"\t[-M|--persistent y|n] [--major major] [--minor minor]\n"
"\t[-P|--partial] " "\n"
"\t[-p|--permission r|rw]\n"
@@ -95,7 +97,7 @@ xx(lvchange,
"\t[-t|--test]\n"
"\t[-v|--verbose]\n"
"\t[--version]\n"
- "\t[--writebehind BehindCount\n"
+ "\t[--writebehind BehindCount]\n"
"\t[--writemostly PhysicalVolume]\n"
"\t[-y|--yes]\n"
"\t[-Z|--zero {y|n}]\n"
@@ -103,7 +105,8 @@ xx(lvchange,
alloc_ARG, autobackup_ARG, activate_ARG, available_ARG, contiguous_ARG,
discards_ARG, force_ARG, ignorelockingfailure_ARG, ignoremonitoring_ARG,
- major_ARG, minor_ARG, monitor_ARG, noudevsync_ARG, partial_ARG,
+ major_ARG, minor_ARG, monitor_ARG, minrecoveryrate_ARG, maxrecoveryrate_ARG,
+ noudevsync_ARG, partial_ARG,
permission_ARG, persistent_ARG, poll_ARG, readahead_ARG, resync_ARG,
refresh_ARG, addtag_ARG, deltag_ARG, syncaction_ARG, sysinit_ARG, test_ARG,
writebehind_ARG, writemostly_ARG, zero_ARG)
@@ -194,6 +197,8 @@ xx(lvcreate,
"\t -L|--size LogicalVolumeSize[bBsSkKmMgGtTpPeE]}\n"
"\t[-M|--persistent {y|n}] [--major major] [--minor minor]\n"
"\t[-m|--mirrors Mirrors [--nosync] [{--mirrorlog {disk|core|mirrored}|--corelog}]]\n"
+ "\t[--minrecoveryrate Rate]\n"
+ "\t[--maxrecoveryrate Rate]\n"
"\t[-n|--name LogicalVolumeName]\n"
"\t[--noudevsync]\n"
"\t[-p|--permission {r|rw}]\n"
@@ -244,7 +249,8 @@ xx(lvcreate,
addtag_ARG, alloc_ARG, autobackup_ARG, activate_ARG, available_ARG,
chunksize_ARG, contiguous_ARG, corelog_ARG, discards_ARG, extents_ARG,
ignoremonitoring_ARG, major_ARG, minor_ARG, mirrorlog_ARG, mirrors_ARG,
- monitor_ARG, name_ARG, nosync_ARG, noudevsync_ARG, permission_ARG,
+ monitor_ARG, minrecoveryrate_ARG, maxrecoveryrate_ARG, name_ARG, nosync_ARG,
+ noudevsync_ARG, permission_ARG,
persistent_ARG, readahead_ARG, regionsize_ARG, size_ARG, snapshot_ARG,
stripes_ARG, stripesize_ARG, test_ARG, thin_ARG, thinpool_ARG, type_ARG,
virtualoriginsize_ARG, poolmetadatasize_ARG, virtualsize_ARG, zero_ARG)
Index: lvm2/tools/lvchange.c
===================================================================
--- lvm2.orig/tools/lvchange.c
+++ lvm2/tools/lvchange.c
@@ -810,6 +810,55 @@ static int lvchange_writemostly(struct l
return 1;
}
+static int lvchange_recovery_rate(struct logical_volume *lv)
+{
+ struct cmd_context *cmd = lv->vg->cmd;
+ struct lv_segment *raid_seg = first_seg(lv);
+
+ if (strcmp(raid_seg->segtype->name, "raid1")) {
+ log_error("Unable to change the recovery rate of non-RAID"
+ " logical volume.");
+ return 0;
+ }
+
+ if (arg_count(cmd, minrecoveryrate_ARG))
+ raid_seg->min_recovery_rate =
+ arg_uint_value(cmd, minrecoveryrate_ARG, 0);
+ if (arg_count(cmd, maxrecoveryrate_ARG))
+ raid_seg->max_recovery_rate =
+ arg_uint_value(cmd, maxrecoveryrate_ARG, 0);
+
+ if (raid_seg->max_recovery_rate &&
+ (raid_seg->max_recovery_rate < raid_seg->min_recovery_rate)) {
+ log_error("Minumum recovery rate cannot"
+ " be set higher than maximum.");
+ return 0;
+ }
+
+ if (!vg_write(lv->vg))
+ return_0;
+
+ if (!suspend_lv(cmd, lv)) {
+ vg_revert(lv->vg);
+ return_0;
+ }
+
+ if (!vg_commit(lv->vg)) {
+ if (!resume_lv(cmd, lv))
+ stack;
+ return_0;
+ }
+
+ log_very_verbose("Updating recovery rate for \"%s\" in kernel",
+ lv->name);
+ if (!resume_lv(cmd, lv)) {
+ log_error("Problem reactivating %s", lv->name);
+ return 0;
+ }
+
+ return 1;
+}
+
static int lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
void *handle __attribute__((unused)))
{
@@ -992,6 +1041,18 @@ static int lvchange_single(struct cmd_co
docmds++;
}
+ /* change [min|max]_recovery_rate */
+ if (arg_count(cmd, minrecoveryrate_ARG) ||
+ arg_count(cmd, maxrecoveryrate_ARG)) {
+ if (!archived && !archive(lv->vg)) {
+ stack;
+ return ECMD_FAILED;
+ }
+ archived = 1;
+ doit += lvchange_recovery_rate(lv);
+ docmds++;
+ }
+
if (doit)
log_print_unless_silent("Logical volume \"%s\" changed", lv->name);
Index: lvm2/tools/lvcreate.c
===================================================================
--- lvm2.orig/tools/lvcreate.c
+++ lvm2/tools/lvcreate.c
@@ -558,6 +558,18 @@ static int _read_raid_params(struct lvcr
return 0;
}
+ if (arg_count(cmd, minrecoveryrate_ARG))
+ lp->min_recovery_rate = arg_uint_value(cmd,
+ minrecoveryrate_ARG, 0);
+ if (arg_count(cmd, maxrecoveryrate_ARG))
+ lp->max_recovery_rate = arg_uint_value(cmd,
+ maxrecoveryrate_ARG, 0);
+
+ if (lp->max_recovery_rate &&
+ (lp->max_recovery_rate < lp->min_recovery_rate)) {
+ log_error("Minumum recovery rate cannot be higher than maximum.");
+ return 0;
+ }
return 1;
}
next reply other threads:[~2013-05-30 21:15 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-30 21:15 Jonathan Brassow [this message]
2013-05-31 7:36 ` [PATCH] DM RAID: Add ability to throttle sync operations for RAID LVs Zdenek Kabelac
2013-05-31 14:36 ` Brassow Jonathan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1369948507.8964.3.camel@f16 \
--to=jbrassow@redhat.com \
--cc=lvm-devel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.