* [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support
@ 2016-03-15 4:38 Bharata B Rao
2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao
` (2 more replies)
0 siblings, 3 replies; 33+ messages in thread
From: Bharata B Rao @ 2016-03-15 4:38 UTC (permalink / raw)
To: qemu-devel; +Cc: thuth, Bharata B Rao, mdroth, qemu-ppc, nfont, imammedo, david
This patchset adds memory hot removal support for PowerPC sPAPR.
This new version switches to using the proposed "count-indexed" type of
hotplug identifier which allows to hot remove a number of LMBs starting
with a given DRC index.
This count-indexed hotplug identifier isn't yet part of PAPR.
Changes in v2
-------------
- Use count-indexed hotplug identifier type for LMB removal.
v1: https://lists.gnu.org/archive/html/qemu-ppc/2015-10/msg00163.html
Bharata B Rao (2):
spapr: Add DRC count indexed hotplug identifier type
spapr: Memory hot-unplug support
hw/ppc/spapr.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++-
hw/ppc/spapr_drc.c | 18 ++++++++++
hw/ppc/spapr_events.c | 57 +++++++++++++++++++++++--------
include/hw/ppc/spapr.h | 2 ++
4 files changed, 155 insertions(+), 15 deletions(-)
--
2.1.0
^ permalink raw reply [flat|nested] 33+ messages in thread* [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type 2016-03-15 4:38 [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support Bharata B Rao @ 2016-03-15 4:38 ` Bharata B Rao 2016-03-16 1:29 ` David Gibson 2016-03-17 16:03 ` Michael Roth 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support Bharata B Rao 2016-05-27 15:48 ` [Qemu-devel] [RFC PATCH v2 0/2] " Thomas Huth 2 siblings, 2 replies; 33+ messages in thread From: Bharata B Rao @ 2016-03-15 4:38 UTC (permalink / raw) To: qemu-devel; +Cc: thuth, Bharata B Rao, mdroth, qemu-ppc, nfont, imammedo, david Add support for DRC count indexed hotplug ID type which is primarily needed for memory hot unplug. This type allows for specifying the number of DRs that should be plugged/unplugged starting from a given DRC index. NOTE: This new hotplug identifier type is not yet part of PAPR. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> --- hw/ppc/spapr_events.c | 57 +++++++++++++++++++++++++++++++++++++------------- include/hw/ppc/spapr.h | 2 ++ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 39f4682..5d1d13d 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -171,6 +171,16 @@ struct epow_log_full { struct rtas_event_log_v6_epow epow; } QEMU_PACKED; +union drc_id { + uint32_t index; + uint32_t count; + struct count_index { + uint32_t index; + uint32_t count; + } count_index; + char name[1]; +} QEMU_PACKED; + struct rtas_event_log_v6_hp { #define RTAS_LOG_V6_SECTION_ID_HOTPLUG 0x4850 /* HP */ struct rtas_event_log_v6_section_header hdr; @@ -187,12 +197,9 @@ struct rtas_event_log_v6_hp { #define RTAS_LOG_V6_HP_ID_DRC_NAME 1 #define RTAS_LOG_V6_HP_ID_DRC_INDEX 2 #define RTAS_LOG_V6_HP_ID_DRC_COUNT 3 +#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED 4 uint8_t reserved; - union { - uint32_t index; - uint32_t count; - char name[1]; - } drc; + union drc_id drc_id; } QEMU_PACKED; struct hp_log_full { @@ -389,7 +396,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, sPAPRDRConnectorType drc_type, - uint32_t drc) + union drc_id *drc_id) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct hp_log_full *new_hp; @@ -446,9 +453,12 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, } if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) { - hp->drc.count = cpu_to_be32(drc); + hp->drc_id.count = cpu_to_be32(drc_id->count); } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) { - hp->drc.index = cpu_to_be32(drc); + hp->drc_id.index = cpu_to_be32(drc_id->index); + } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) { + hp->drc_id.count_index.count = cpu_to_be32(drc_id->count_index.count); + hp->drc_id.count_index.index = cpu_to_be32(drc_id->count_index.index); } rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); @@ -460,34 +470,53 @@ void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); sPAPRDRConnectorType drc_type = drck->get_type(drc); - uint32_t index = drck->get_index(drc); + union drc_id drc_id; + drc_id.index = drck->get_index(drc); spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, - RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index); + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); } void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc) { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); sPAPRDRConnectorType drc_type = drck->get_type(drc); - uint32_t index = drck->get_index(drc); + union drc_id drc_id; + drc_id.index = drck->get_index(drc); spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, - RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index); + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); } void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type, uint32_t count) { + union drc_id drc_id; + drc_id.count = count; + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, - RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count); + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); } void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type, uint32_t count) { + union drc_id drc_id; + drc_id.count = count; + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, - RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count); + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); +} + +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, + uint32_t count, uint32_t index) +{ + union drc_id drc_id; + drc_id.count_index.count = count; + drc_id.count_index.index = index; + + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED, + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); } static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 098d85d..f0c426b 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -585,6 +585,8 @@ void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type, uint32_t count); void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type, uint32_t count); +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, + uint32_t count, uint32_t index); /* rtas-configure-connector state */ struct sPAPRConfigureConnectorState { -- 2.1.0 ^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao @ 2016-03-16 1:29 ` David Gibson 2016-03-17 16:03 ` Michael Roth 1 sibling, 0 replies; 33+ messages in thread From: David Gibson @ 2016-03-16 1:29 UTC (permalink / raw) To: Bharata B Rao; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo [-- Attachment #1: Type: text/plain, Size: 6568 bytes --] On Tue, Mar 15, 2016 at 10:08:55AM +0530, Bharata B Rao wrote: > Add support for DRC count indexed hotplug ID type which is primarily > needed for memory hot unplug. This type allows for specifying the > number of DRs that should be plugged/unplugged starting from a given > DRC index. > > NOTE: This new hotplug identifier type is not yet part of PAPR. > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Looks correct, but obviously I won't apply until the change reaches PAPR. > --- > hw/ppc/spapr_events.c | 57 +++++++++++++++++++++++++++++++++++++------------- > include/hw/ppc/spapr.h | 2 ++ > 2 files changed, 45 insertions(+), 14 deletions(-) > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 39f4682..5d1d13d 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -171,6 +171,16 @@ struct epow_log_full { > struct rtas_event_log_v6_epow epow; > } QEMU_PACKED; > > +union drc_id { > + uint32_t index; > + uint32_t count; > + struct count_index { > + uint32_t index; > + uint32_t count; > + } count_index; > + char name[1]; > +} QEMU_PACKED; > + > struct rtas_event_log_v6_hp { > #define RTAS_LOG_V6_SECTION_ID_HOTPLUG 0x4850 /* HP */ > struct rtas_event_log_v6_section_header hdr; > @@ -187,12 +197,9 @@ struct rtas_event_log_v6_hp { > #define RTAS_LOG_V6_HP_ID_DRC_NAME 1 > #define RTAS_LOG_V6_HP_ID_DRC_INDEX 2 > #define RTAS_LOG_V6_HP_ID_DRC_COUNT 3 > +#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED 4 > uint8_t reserved; > - union { > - uint32_t index; > - uint32_t count; > - char name[1]; > - } drc; > + union drc_id drc_id; > } QEMU_PACKED; > > struct hp_log_full { > @@ -389,7 +396,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) > > static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, > sPAPRDRConnectorType drc_type, > - uint32_t drc) > + union drc_id *drc_id) > { > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > struct hp_log_full *new_hp; > @@ -446,9 +453,12 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, > } > > if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) { > - hp->drc.count = cpu_to_be32(drc); > + hp->drc_id.count = cpu_to_be32(drc_id->count); > } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) { > - hp->drc.index = cpu_to_be32(drc); > + hp->drc_id.index = cpu_to_be32(drc_id->index); > + } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) { > + hp->drc_id.count_index.count = cpu_to_be32(drc_id->count_index.count); > + hp->drc_id.count_index.index = cpu_to_be32(drc_id->count_index.index); > } > > rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); > @@ -460,34 +470,53 @@ void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) > { > sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > sPAPRDRConnectorType drc_type = drck->get_type(drc); > - uint32_t index = drck->get_index(drc); > + union drc_id drc_id; > + drc_id.index = drck->get_index(drc); > > spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, > - RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index); > + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); > } > > void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc) > { > sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > sPAPRDRConnectorType drc_type = drck->get_type(drc); > - uint32_t index = drck->get_index(drc); > + union drc_id drc_id; > + drc_id.index = drck->get_index(drc); > > spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, > - RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index); > + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); > } > > void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type, > uint32_t count) > { > + union drc_id drc_id; > + drc_id.count = count; > + > spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, > - RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count); > + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); > } > > void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type, > uint32_t count) > { > + union drc_id drc_id; > + drc_id.count = count; > + > spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, > - RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count); > + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); > +} > + > +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, > + uint32_t count, uint32_t index) > +{ > + union drc_id drc_id; > + drc_id.count_index.count = count; > + drc_id.count_index.index = index; > + > + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED, > + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); > } > > static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 098d85d..f0c426b 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -585,6 +585,8 @@ void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type, > uint32_t count); > void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type, > uint32_t count); > +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, > + uint32_t count, uint32_t index); > > /* rtas-configure-connector state */ > struct sPAPRConfigureConnectorState { -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 819 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao 2016-03-16 1:29 ` David Gibson @ 2016-03-17 16:03 ` Michael Roth 1 sibling, 0 replies; 33+ messages in thread From: Michael Roth @ 2016-03-17 16:03 UTC (permalink / raw) To: Bharata B Rao, qemu-devel; +Cc: thuth, qemu-ppc, imammedo, nfont, david Quoting Bharata B Rao (2016-03-14 23:38:55) > Add support for DRC count indexed hotplug ID type which is primarily > needed for memory hot unplug. This type allows for specifying the > number of DRs that should be plugged/unplugged starting from a given > DRC index. > > NOTE: This new hotplug identifier type is not yet part of PAPR. > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > --- > hw/ppc/spapr_events.c | 57 +++++++++++++++++++++++++++++++++++++------------- > include/hw/ppc/spapr.h | 2 ++ > 2 files changed, 45 insertions(+), 14 deletions(-) > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index 39f4682..5d1d13d 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -171,6 +171,16 @@ struct epow_log_full { > struct rtas_event_log_v6_epow epow; > } QEMU_PACKED; > > +union drc_id { > + uint32_t index; > + uint32_t count; > + struct count_index { > + uint32_t index; > + uint32_t count; The current version of the spec proposal is actually count followed by index. I kind of wish it was in the opposite order, and it's probably not too late to change this if there's pressing reason, but that's how things stand atm. > + } count_index; > + char name[1]; > +} QEMU_PACKED; > + > struct rtas_event_log_v6_hp { > #define RTAS_LOG_V6_SECTION_ID_HOTPLUG 0x4850 /* HP */ > struct rtas_event_log_v6_section_header hdr; > @@ -187,12 +197,9 @@ struct rtas_event_log_v6_hp { > #define RTAS_LOG_V6_HP_ID_DRC_NAME 1 > #define RTAS_LOG_V6_HP_ID_DRC_INDEX 2 > #define RTAS_LOG_V6_HP_ID_DRC_COUNT 3 > +#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED 4 > uint8_t reserved; > - union { > - uint32_t index; > - uint32_t count; > - char name[1]; > - } drc; > + union drc_id drc_id; > } QEMU_PACKED; > > struct hp_log_full { > @@ -389,7 +396,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) > > static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, > sPAPRDRConnectorType drc_type, > - uint32_t drc) > + union drc_id *drc_id) > { > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > struct hp_log_full *new_hp; > @@ -446,9 +453,12 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, > } > > if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) { > - hp->drc.count = cpu_to_be32(drc); > + hp->drc_id.count = cpu_to_be32(drc_id->count); > } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) { > - hp->drc.index = cpu_to_be32(drc); > + hp->drc_id.index = cpu_to_be32(drc_id->index); > + } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) { > + hp->drc_id.count_index.count = cpu_to_be32(drc_id->count_index.count); > + hp->drc_id.count_index.index = cpu_to_be32(drc_id->count_index.index); > } > > rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); > @@ -460,34 +470,53 @@ void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) > { > sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > sPAPRDRConnectorType drc_type = drck->get_type(drc); > - uint32_t index = drck->get_index(drc); > + union drc_id drc_id; I'd rather we used 'union drc_id id' or something. Having the typename and variable names be identical is a little confusing. > + drc_id.index = drck->get_index(drc); > > spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, > - RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index); > + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); > } > > void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc) > { > sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > sPAPRDRConnectorType drc_type = drck->get_type(drc); > - uint32_t index = drck->get_index(drc); > + union drc_id drc_id; > + drc_id.index = drck->get_index(drc); > > spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, > - RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index); > + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); > } > > void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type, > uint32_t count) > { > + union drc_id drc_id; > + drc_id.count = count; > + > spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, > - RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count); > + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); > } > > void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type, > uint32_t count) > { > + union drc_id drc_id; > + drc_id.count = count; > + > spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, > - RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count); > + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); > +} > + > +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, > + uint32_t count, uint32_t index) > +{ > + union drc_id drc_id; > + drc_id.count_index.count = count; > + drc_id.count_index.index = index; > + > + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED, > + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); > } > > static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index 098d85d..f0c426b 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -585,6 +585,8 @@ void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type, > uint32_t count); > void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type, > uint32_t count); > +void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, > + uint32_t count, uint32_t index); > > /* rtas-configure-connector state */ > struct sPAPRConfigureConnectorState { > -- > 2.1.0 > ^ permalink raw reply [flat|nested] 33+ messages in thread
* [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-03-15 4:38 [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support Bharata B Rao 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao @ 2016-03-15 4:38 ` Bharata B Rao 2016-03-16 1:36 ` David Gibson 2016-05-27 15:48 ` [Qemu-devel] [RFC PATCH v2 0/2] " Thomas Huth 2 siblings, 1 reply; 33+ messages in thread From: Bharata B Rao @ 2016-03-15 4:38 UTC (permalink / raw) To: qemu-devel; +Cc: thuth, Bharata B Rao, mdroth, qemu-ppc, nfont, imammedo, david Add support to hot remove pc-dimm memory devices. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> --- hw/ppc/spapr.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++- hw/ppc/spapr_drc.c | 18 +++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 43708a2..cdf268a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2197,6 +2197,88 @@ out: error_propagate(errp, local_err); } +typedef struct sPAPRDIMMState { + uint32_t nr_lmbs; +} sPAPRDIMMState; + +static void spapr_lmb_release(DeviceState *dev, void *opaque) +{ + sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; + HotplugHandler *hotplug_ctrl = NULL; + + if (--ds->nr_lmbs) { + return; + } + + g_free(ds); + + /* + * Now that all the LMBs have been removed by the guest, call the + * pc-dimm unplug handler to cleanup up the pc-dimm device. + */ + hotplug_ctrl = qdev_get_hotplug_handler(dev); + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); +} + +static void spapr_del_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, + Error **errp) +{ + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + int i; + sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); + uint32_t start_index; + + ds->nr_lmbs = nr_lmbs; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->detach(drc, dev, spapr_lmb_release, ds, errp); + if (!i) { + start_index = drck->get_index(drc); + } + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, start_index); +} + +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); + object_unparent(OBJECT(dev)); +} + +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + Error *local_err = NULL; + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); + uint64_t addr; + + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out; + } + + spapr_del_lmbs(dev, addr, size, &error_abort); +out: + error_propagate(errp, local_err); +} + static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2244,7 +2326,15 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - error_setg(errp, "Memory hot unplug not supported by sPAPR"); + spapr_memory_unplug(hotplug_dev, dev, errp); + } +} + +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_unplug_request(hotplug_dev, dev, errp); } } @@ -2293,6 +2383,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) hc->plug = spapr_machine_device_plug; hc->unplug = spapr_machine_device_unplug; mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; + hc->unplug_request = spapr_machine_device_unplug_request; smc->dr_lmb_enabled = true; fwc->get_dev_path = spapr_get_fw_dev_path; diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index ef063c0..740b9d4 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -12,6 +12,7 @@ #include "qemu/osdep.h" #include "hw/ppc/spapr_drc.h" +#include "hw/ppc/spapr.h" #include "qom/object.h" #include "hw/qdev.h" #include "qapi/visitor.h" @@ -78,6 +79,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, } } + /* + * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't + * belong to a DIMM device that is marked for removal. + * + * Currently the guest userspace tool drmgr that drives the memory + * hotplug/unplug will just try to remove a set of 'removable' LMBs + * in response to a hot unplug request that is based on drc-count. + * If the LMB being removed doesn't belong to a DIMM device that is + * actually being unplugged, fail the isolation request here. + */ + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) { + if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && + !drc->awaiting_release) { + return RTAS_OUT_HW_ERROR; + } + } + drc->isolation_state = state; if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { -- 2.1.0 ^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support Bharata B Rao @ 2016-03-16 1:36 ` David Gibson 2016-03-16 4:41 ` Bharata B Rao 0 siblings, 1 reply; 33+ messages in thread From: David Gibson @ 2016-03-16 1:36 UTC (permalink / raw) To: Bharata B Rao; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo [-- Attachment #1: Type: text/plain, Size: 6520 bytes --] On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > Add support to hot remove pc-dimm memory devices. > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Looks correct, but again, needs to wait on the PAPR change. Have you thought any further on the idea of sending an index message, then a count message as an interim approach to fixing this without requiring a PAPR change? > --- > hw/ppc/spapr.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_drc.c | 18 +++++++++++ > 2 files changed, 110 insertions(+), 1 deletion(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 43708a2..cdf268a 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2197,6 +2197,88 @@ out: > error_propagate(errp, local_err); > } > > +typedef struct sPAPRDIMMState { > + uint32_t nr_lmbs; > +} sPAPRDIMMState; > + > +static void spapr_lmb_release(DeviceState *dev, void *opaque) > +{ > + sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; > + HotplugHandler *hotplug_ctrl = NULL; > + > + if (--ds->nr_lmbs) { > + return; > + } > + > + g_free(ds); > + > + /* > + * Now that all the LMBs have been removed by the guest, call the > + * pc-dimm unplug handler to cleanup up the pc-dimm device. > + */ > + hotplug_ctrl = qdev_get_hotplug_handler(dev); > + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); > +} > + > +static void spapr_del_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, > + Error **errp) > +{ > + sPAPRDRConnector *drc; > + sPAPRDRConnectorClass *drck; > + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; > + int i; > + sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); > + uint32_t start_index; > + > + ds->nr_lmbs = nr_lmbs; > + for (i = 0; i < nr_lmbs; i++) { > + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, > + addr / SPAPR_MEMORY_BLOCK_SIZE); > + g_assert(drc); > + > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > + drck->detach(drc, dev, spapr_lmb_release, ds, errp); > + if (!i) { > + start_index = drck->get_index(drc); > + } > + addr += SPAPR_MEMORY_BLOCK_SIZE; > + } > + spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, > + nr_lmbs, start_index); > +} > + > +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, > + Error **errp) > +{ > + sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); > + PCDIMMDevice *dimm = PC_DIMM(dev); > + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); > + MemoryRegion *mr = ddc->get_memory_region(dimm); > + > + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); > + object_unparent(OBJECT(dev)); > +} > + > +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + Error *local_err = NULL; > + PCDIMMDevice *dimm = PC_DIMM(dev); > + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); > + MemoryRegion *mr = ddc->get_memory_region(dimm); > + uint64_t size = memory_region_size(mr); > + uint64_t addr; > + > + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); > + if (local_err) { > + goto out; > + } > + > + spapr_del_lmbs(dev, addr, size, &error_abort); > +out: > + error_propagate(errp, local_err); > +} > + > static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > DeviceState *dev, Error **errp) > { > @@ -2244,7 +2326,15 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, > DeviceState *dev, Error **errp) > { > if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { > - error_setg(errp, "Memory hot unplug not supported by sPAPR"); > + spapr_memory_unplug(hotplug_dev, dev, errp); > + } > +} > + > +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { > + spapr_memory_unplug_request(hotplug_dev, dev, errp); > } > } > > @@ -2293,6 +2383,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) > hc->plug = spapr_machine_device_plug; > hc->unplug = spapr_machine_device_unplug; > mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; > + hc->unplug_request = spapr_machine_device_unplug_request; > > smc->dr_lmb_enabled = true; > fwc->get_dev_path = spapr_get_fw_dev_path; > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c > index ef063c0..740b9d4 100644 > --- a/hw/ppc/spapr_drc.c > +++ b/hw/ppc/spapr_drc.c > @@ -12,6 +12,7 @@ > > #include "qemu/osdep.h" > #include "hw/ppc/spapr_drc.h" > +#include "hw/ppc/spapr.h" > #include "qom/object.h" > #include "hw/qdev.h" > #include "qapi/visitor.h" > @@ -78,6 +79,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, > } > } > > + /* > + * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't > + * belong to a DIMM device that is marked for removal. > + * > + * Currently the guest userspace tool drmgr that drives the memory > + * hotplug/unplug will just try to remove a set of 'removable' LMBs > + * in response to a hot unplug request that is based on drc-count. > + * If the LMB being removed doesn't belong to a DIMM device that is > + * actually being unplugged, fail the isolation request here. > + */ > + if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) { > + if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && > + !drc->awaiting_release) { > + return RTAS_OUT_HW_ERROR; > + } > + } > + > drc->isolation_state = state; > > if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 819 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-03-16 1:36 ` David Gibson @ 2016-03-16 4:41 ` Bharata B Rao 2016-03-16 5:11 ` David Gibson ` (2 more replies) 0 siblings, 3 replies; 33+ messages in thread From: Bharata B Rao @ 2016-03-16 4:41 UTC (permalink / raw) To: David Gibson; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > Add support to hot remove pc-dimm memory devices. > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > Looks correct, but again, needs to wait on the PAPR change. > > Have you thought any further on the idea of sending an index message, > then a count message as an interim approach to fixing this without > requiring a PAPR change? Removal by index and removal by count are valid messages by themselves and drmgr would go ahead and start the removal in reponse to those calls. IIUC, you are suggesting that lets remove one LMB by index in response to 1st message and remove (count -1) LMBs from where the last removal was done in the previous message. Since the same code base of powerpc-utils works on PowerVM too, I am not sure if such an approach would impact PowerVM in any undesirable manner. May be Nathan can clarify ? I see that this can be done, but the changes in drmgr code specially the code related to LMB list handling/removal can be non-trivial. So not sure if the temporary approach is all that worth here and hence I feel it is better to wait and do it the count-indexed way. While we are here, I would also like to get some opinion on the real need for memory unplug. Is there anything that memory unplug gives us which memory ballooning (shrinking mem via ballooning) can't give ? Regards, Bharata. ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-03-16 4:41 ` Bharata B Rao @ 2016-03-16 5:11 ` David Gibson 2016-03-23 3:22 ` David Gibson 2016-04-25 9:20 ` Igor Mammedov 2 siblings, 0 replies; 33+ messages in thread From: David Gibson @ 2016-03-16 5:11 UTC (permalink / raw) To: Bharata B Rao; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo [-- Attachment #1: Type: text/plain, Size: 2426 bytes --] On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote: > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > Add support to hot remove pc-dimm memory devices. > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > Looks correct, but again, needs to wait on the PAPR change. > > > > Have you thought any further on the idea of sending an index message, > > then a count message as an interim approach to fixing this without > > requiring a PAPR change? > > Removal by index and removal by count are valid messages by themselves > and drmgr would go ahead and start the removal in reponse to those > calls. IIUC, you are suggesting that lets remove one LMB by index in > response to 1st message and remove (count -1) LMBs from where the last > removal was done in the previous message. That's right. > Since the same code base of powerpc-utils works on PowerVM too, I am not > sure if such an approach would impact PowerVM in any undesirable manner. > May be Nathan can clarify ? Ah.. My first guess would be that it's ok; since IIUC PowerVM doesn't care where the LMBs are removed from, removing them starting from the last place we removed something should be as good as anywhere. But it's possible there's some issue I haven't considered. > I see that this can be done, but the changes in drmgr code specially the > code related to LMB list handling/removal can be non-trivial. So not sure > if the temporary approach is all that worth here and hence I feel it is better > to wait and do it the count-indexed way. Ok. It seems like it ought to be fairly straightforward, but I don't know the drmgr code, so.. It would certainly be useful if Nathan could chime in on this. > While we are here, I would also like to get some opinion on the real > need for memory unplug. Is there anything that memory unplug gives us > which memory ballooning (shrinking mem via ballooning) can't give ? That's.. a good question. I guess it means avoiding another interface and a pseudo-device at least. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 819 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-03-16 4:41 ` Bharata B Rao 2016-03-16 5:11 ` David Gibson @ 2016-03-23 3:22 ` David Gibson 2016-03-24 14:15 ` Nathan Fontenot 2016-04-25 9:20 ` Igor Mammedov 2 siblings, 1 reply; 33+ messages in thread From: David Gibson @ 2016-03-23 3:22 UTC (permalink / raw) To: Bharata B Rao; +Cc: thuth, qemu-devel, mdroth, qemu-ppc, nfont, imammedo [-- Attachment #1: Type: text/plain, Size: 2336 bytes --] On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote: > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > Add support to hot remove pc-dimm memory devices. > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > Looks correct, but again, needs to wait on the PAPR change. > > > > Have you thought any further on the idea of sending an index message, > > then a count message as an interim approach to fixing this without > > requiring a PAPR change? > > Removal by index and removal by count are valid messages by themselves > and drmgr would go ahead and start the removal in reponse to those > calls. IIUC, you are suggesting that lets remove one LMB by index in > response to 1st message and remove (count -1) LMBs from where the last > removal was done in the previous message. Yes, that's the idea. > Since the same code base of powerpc-utils works on PowerVM too, I am not > sure if such an approach would impact PowerVM in any undesirable manner. > May be Nathan can clarify ? Heard anything from Nathan? I don't really see how it would be bad under PowerVM. Under PowerVM it generally doesn't matter which LMBs you remove, right? So removing the ones immediately after the last one you removed should be as good a choice as any. > I see that this can be done, but the changes in drmgr code specially the > code related to LMB list handling/removal can be non-trivial. So not sure > if the temporary approach is all that worth here and hence I feel it is better > to wait and do it the count-indexed way. Really? drmgr is already scanning LMBs to find ones it can remove. Seeding that scan with the last removed LMB shouldn't be too hard. > While we are here, I would also like to get some opinion on the real > need for memory unplug. Is there anything that memory unplug gives us > which memory ballooning (shrinking mem via ballooning) can't give ? Hmm.. that's an interesting question. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 819 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-03-23 3:22 ` David Gibson @ 2016-03-24 14:15 ` Nathan Fontenot 2016-03-29 4:41 ` David Gibson 0 siblings, 1 reply; 33+ messages in thread From: Nathan Fontenot @ 2016-03-24 14:15 UTC (permalink / raw) To: David Gibson, Bharata B Rao; +Cc: imammedo, thuth, qemu-ppc, qemu-devel, mdroth On 03/22/2016 10:22 PM, David Gibson wrote: > On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote: >> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: >>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: >>>> Add support to hot remove pc-dimm memory devices. >>>> >>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> >>> >>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> >>> >>> Looks correct, but again, needs to wait on the PAPR change. >>> >>> Have you thought any further on the idea of sending an index message, >>> then a count message as an interim approach to fixing this without >>> requiring a PAPR change? >> >> Removal by index and removal by count are valid messages by themselves >> and drmgr would go ahead and start the removal in reponse to those >> calls. IIUC, you are suggesting that lets remove one LMB by index in >> response to 1st message and remove (count -1) LMBs from where the last >> removal was done in the previous message. > > Yes, that's the idea. > >> Since the same code base of powerpc-utils works on PowerVM too, I am not >> sure if such an approach would impact PowerVM in any undesirable manner. >> May be Nathan can clarify ? The issue I see with this approach is that there is no way in the current drmgr code to correlate the two memory remove requests. If I understand what you are asking to do correctly, this would result in two separate invocations of drmgr. The first to remove a specific LMB by index, this index then needing to be saved somewhere, then a second invocation that would retrieve the index and remove count-1 LMBs. Would there be anything tying these two requests together? or would we assume that two requests received in this order are correlated? What happens if another request comes in in between these two requests? I see this as being a pretty rare possibility, but it is a possibility. > > Heard anything from Nathan? I don't really see how it would be bad > under PowerVM. Under PowerVM it generally doesn't matter which LMBs > you remove, right? So removing the ones immediately after the last > one you removed should be as good a choice as any. This shouldn't hurt anything for PowerVM systems. In general the only time a specific LMB is specified for PowerVM systems is on memory guard operations. > >> I see that this can be done, but the changes in drmgr code specially the >> code related to LMB list handling/removal can be non-trivial. So not sure >> if the temporary approach is all that worth here and hence I feel it is better >> to wait and do it the count-indexed way. > > Really? drmgr is already scanning LMBs to find ones it can remove. > Seeding that scan with the last removed LMB shouldn't be too hard. This shouldn't be difficult to implement in the drmgr code. We already search a list of LMBs to find ones to remove, updating to just return the LMB with the next sequential index shouldn't be difficult. -Nathan > >> While we are here, I would also like to get some opinion on the real >> need for memory unplug. Is there anything that memory unplug gives us >> which memory ballooning (shrinking mem via ballooning) can't give ? > > Hmm.. that's an interesting question. > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-03-24 14:15 ` Nathan Fontenot @ 2016-03-29 4:41 ` David Gibson 0 siblings, 0 replies; 33+ messages in thread From: David Gibson @ 2016-03-29 4:41 UTC (permalink / raw) To: Nathan Fontenot Cc: thuth, mdroth, qemu-devel, qemu-ppc, Bharata B Rao, imammedo [-- Attachment #1: Type: text/plain, Size: 4560 bytes --] On Thu, Mar 24, 2016 at 09:15:58AM -0500, Nathan Fontenot wrote: > On 03/22/2016 10:22 PM, David Gibson wrote: > > On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote: > >> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > >>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > >>>> Add support to hot remove pc-dimm memory devices. > >>>> > >>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > >>> > >>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > >>> > >>> Looks correct, but again, needs to wait on the PAPR change. > >>> > >>> Have you thought any further on the idea of sending an index message, > >>> then a count message as an interim approach to fixing this without > >>> requiring a PAPR change? > >> > >> Removal by index and removal by count are valid messages by themselves > >> and drmgr would go ahead and start the removal in reponse to those > >> calls. IIUC, you are suggesting that lets remove one LMB by index in > >> response to 1st message and remove (count -1) LMBs from where the last > >> removal was done in the previous message. > > > > Yes, that's the idea. > > > >> Since the same code base of powerpc-utils works on PowerVM too, I am not > >> sure if such an approach would impact PowerVM in any undesirable manner. > >> May be Nathan can clarify ? > > The issue I see with this approach is that there is no way in the current > drmgr code to correlate the two memory remove requests. If I understand > what you are asking to do correctly, this would result in two separate > invocations of drmgr. The first to remove a specific LMB by index, this > index then needing to be saved somewhere, then a second invocation that > would retrieve the index and remove count-1 LMBs. Ah.. yes.. I had forgotten that this would be two separate drmgr invocations, and therefore we'd need a way to carry data between them. That does complicate this rather. > Would there be anything tying these two requests together? or would we > assume that two requests received in this order are correlated? My assumption was that it would be based simply on order. > What happens if another request comes in in between these two requests? > I see this as being a pretty rare possibility, but it is a possibility. I'm not sure it actually is possible under KVM - I think the qemu side processes the requests synchronously. I'm not 100% certain about that though. The plan was that the qemu HV would not permit LMBs to be removed if they're not the ones that are supposed to be removed, and so drmgr would keep scanning until it finds the right ones. So, even if the request order is jumbled, the behaviour should be still technically correct - it could be *very* slow though as drmgr might end up vacating (piece by piece) large areas of the guest's RAM while it scans for the right LMBs to remove. > > Heard anything from Nathan? I don't really see how it would be bad > > under PowerVM. Under PowerVM it generally doesn't matter which LMBs > > you remove, right? So removing the ones immediately after the last > > one you removed should be as good a choice as any. > > This shouldn't hurt anything for PowerVM systems. In general the only > time a specific LMB is specified for PowerVM systems is on memory guard > operations. Ok. > >> I see that this can be done, but the changes in drmgr code specially the > >> code related to LMB list handling/removal can be non-trivial. So not sure > >> if the temporary approach is all that worth here and hence I feel it is better > >> to wait and do it the count-indexed way. > > > > Really? drmgr is already scanning LMBs to find ones it can remove. > > Seeding that scan with the last removed LMB shouldn't be too hard. > > This shouldn't be difficult to implement in the drmgr code. We already > search a list of LMBs to find ones to remove, updating to just return > the LMB with the next sequential index shouldn't be difficult. > > -Nathan > > > > >> While we are here, I would also like to get some opinion on the real > >> need for memory unplug. Is there anything that memory unplug gives us > >> which memory ballooning (shrinking mem via ballooning) can't give ? > > > > Hmm.. that's an interesting question. > > > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 819 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-03-16 4:41 ` Bharata B Rao 2016-03-16 5:11 ` David Gibson 2016-03-23 3:22 ` David Gibson @ 2016-04-25 9:20 ` Igor Mammedov 2016-04-26 5:09 ` Bharata B Rao 2 siblings, 1 reply; 33+ messages in thread From: Igor Mammedov @ 2016-04-25 9:20 UTC (permalink / raw) To: Bharata B Rao; +Cc: David Gibson, thuth, qemu-devel, mdroth, qemu-ppc, nfont On Wed, 16 Mar 2016 10:11:54 +0530 Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > Add support to hot remove pc-dimm memory devices. > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > Looks correct, but again, needs to wait on the PAPR change. [...] > > While we are here, I would also like to get some opinion on the real > need for memory unplug. Is there anything that memory unplug gives us > which memory ballooning (shrinking mem via ballooning) can't give ? Sure ballooning can complement memory hotplug but turning it on would effectively reduce hotplug to balloning as it would enable overcommit capability instead of hard partitioning pc-dimms provides. So one could just use ballooning only and not bother with hotplug at all. On the other hand memory hotplug/unplug (at least on x86) tries to model real hardware, thus removing need in paravirt ballooning solution in favor of native guest support. PS: Guest wise, currently hot-unplug is not well supported in linux, i.e. it's not guarantied that guest will honor unplug request as it may pin dimm by using it as a non migratable memory. So there is something to work on guest side to make unplug more reliable/guarantied. > > Regards, > Bharata. > > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-25 9:20 ` Igor Mammedov @ 2016-04-26 5:09 ` Bharata B Rao 2016-04-26 7:52 ` Igor Mammedov 0 siblings, 1 reply; 33+ messages in thread From: Bharata B Rao @ 2016-04-26 5:09 UTC (permalink / raw) To: Igor Mammedov; +Cc: David Gibson, thuth, qemu-devel, mdroth, qemu-ppc, nfont On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > On Wed, 16 Mar 2016 10:11:54 +0530 > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > > Add support to hot remove pc-dimm memory devices. > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > > > Looks correct, but again, needs to wait on the PAPR change. > [...] > > > > While we are here, I would also like to get some opinion on the real > > need for memory unplug. Is there anything that memory unplug gives us > > which memory ballooning (shrinking mem via ballooning) can't give ? > Sure ballooning can complement memory hotplug but turning it on would > effectively reduce hotplug to balloning as it would enable overcommit > capability instead of hard partitioning pc-dimms provides. So one > could just use ballooning only and not bother with hotplug at all. > > On the other hand memory hotplug/unplug (at least on x86) tries > to model real hardware, thus removing need in paravirt ballooning > solution in favor of native guest support. Thanks for your views. > > PS: > Guest wise, currently hot-unplug is not well supported in linux, > i.e. it's not guarantied that guest will honor unplug request > as it may pin dimm by using it as a non migratable memory. So > there is something to work on guest side to make unplug more > reliable/guarantied. In the above scenario where the guest doesn't allow removal of certain parts of DIMM memory, what is the expected behaviour as far as QEMU DIMM device is concerned ? I seem to be running into this situation very often with PowerPC mem unplug where I am left with a DIMM device that has only some memory blocks released. In this situation, I would like to block further unplug requests on the same device, but QEMU seems to allow more such unplug requests to come in via the monitor. So qdev won't help me here ? Should I detect such condition from the machine unplug() handler and take required action ? On x86, if some pages are offlined and subsequently other pages couldn't be offlined, then I see the full DIMM memory size remaining with the guest. So I infer that on x86, QEMU memory unplug either removes full DIMM or nothing. Is that understanding correct ? Regards, Bharata. ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-26 5:09 ` Bharata B Rao @ 2016-04-26 7:52 ` Igor Mammedov 2016-04-26 21:03 ` Michael Roth 0 siblings, 1 reply; 33+ messages in thread From: Igor Mammedov @ 2016-04-26 7:52 UTC (permalink / raw) To: Bharata B Rao; +Cc: David Gibson, thuth, qemu-devel, mdroth, qemu-ppc, nfont On Tue, 26 Apr 2016 10:39:23 +0530 Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > On Wed, 16 Mar 2016 10:11:54 +0530 > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > > > Add support to hot remove pc-dimm memory devices. > > > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > > > > > Looks correct, but again, needs to wait on the PAPR change. > > [...] > > > > > > While we are here, I would also like to get some opinion on the real > > > need for memory unplug. Is there anything that memory unplug gives us > > > which memory ballooning (shrinking mem via ballooning) can't give ? > > Sure ballooning can complement memory hotplug but turning it on would > > effectively reduce hotplug to balloning as it would enable overcommit > > capability instead of hard partitioning pc-dimms provides. So one > > could just use ballooning only and not bother with hotplug at all. > > > > On the other hand memory hotplug/unplug (at least on x86) tries > > to model real hardware, thus removing need in paravirt ballooning > > solution in favor of native guest support. > > Thanks for your views. > > > > > PS: > > Guest wise, currently hot-unplug is not well supported in linux, > > i.e. it's not guarantied that guest will honor unplug request > > as it may pin dimm by using it as a non migratable memory. So > > there is something to work on guest side to make unplug more > > reliable/guarantied. > > In the above scenario where the guest doesn't allow removal of certain > parts of DIMM memory, what is the expected behaviour as far as QEMU > DIMM device is concerned ? I seem to be running into this situation > very often with PowerPC mem unplug where I am left with a DIMM device > that has only some memory blocks released. In this situation, I would like > to block further unplug requests on the same device, but QEMU seems > to allow more such unplug requests to come in via the monitor. So > qdev won't help me here ? Should I detect such condition from the > machine unplug() handler and take required action ? I think offlining is a guests task along with recovering from inability to offline (i.e. offline all + eject or restore original state). QUEM does it's job by notifying guest what dimm it wants to remove and removes it when guest asks it (at least in x86 world). > > On x86, if some pages are offlined and subsequently other pages couldn't > be offlined, then I see the full DIMM memory size remaining > with the guest. So I infer that on x86, QEMU memory unplug either > removes full DIMM or nothing. Is that understanding correct ? I wouldn't bet that it's guarantied behavior but it should be this way. > > Regards, > Bharata. > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-26 7:52 ` Igor Mammedov @ 2016-04-26 21:03 ` Michael Roth 2016-04-27 6:54 ` Thomas Huth ` (2 more replies) 0 siblings, 3 replies; 33+ messages in thread From: Michael Roth @ 2016-04-26 21:03 UTC (permalink / raw) To: Igor Mammedov, Bharata B Rao Cc: David Gibson, thuth, qemu-devel, qemu-ppc, nfont Quoting Igor Mammedov (2016-04-26 02:52:36) > On Tue, 26 Apr 2016 10:39:23 +0530 > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > > On Wed, 16 Mar 2016 10:11:54 +0530 > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > > > > Add support to hot remove pc-dimm memory devices. > > > > > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > > > > > > > Looks correct, but again, needs to wait on the PAPR change. > > > [...] > > > > > > > > While we are here, I would also like to get some opinion on the real > > > > need for memory unplug. Is there anything that memory unplug gives us > > > > which memory ballooning (shrinking mem via ballooning) can't give ? > > > Sure ballooning can complement memory hotplug but turning it on would > > > effectively reduce hotplug to balloning as it would enable overcommit > > > capability instead of hard partitioning pc-dimms provides. So one > > > could just use ballooning only and not bother with hotplug at all. > > > > > > On the other hand memory hotplug/unplug (at least on x86) tries > > > to model real hardware, thus removing need in paravirt ballooning > > > solution in favor of native guest support. > > > > Thanks for your views. > > > > > > > > PS: > > > Guest wise, currently hot-unplug is not well supported in linux, > > > i.e. it's not guarantied that guest will honor unplug request > > > as it may pin dimm by using it as a non migratable memory. So > > > there is something to work on guest side to make unplug more > > > reliable/guarantied. > > > > In the above scenario where the guest doesn't allow removal of certain > > parts of DIMM memory, what is the expected behaviour as far as QEMU > > DIMM device is concerned ? I seem to be running into this situation > > very often with PowerPC mem unplug where I am left with a DIMM device > > that has only some memory blocks released. In this situation, I would like > > to block further unplug requests on the same device, but QEMU seems > > to allow more such unplug requests to come in via the monitor. So > > qdev won't help me here ? Should I detect such condition from the > > machine unplug() handler and take required action ? > I think offlining is a guests task along with recovering from > inability to offline (i.e. offline all + eject or restore original state). > QUEM does it's job by notifying guest what dimm it wants to remove > and removes it when guest asks it (at least in x86 world). In the case of pseries, the DIMM abstraction isn't really exposed to the guest, but rather the memory blocks we use to make the backing memdev memory available to the guest. During unplug, the guest completely releases these blocks back to QEMU, and if it can only release a subset of what's requested it does not attempt to recover. We can potentially change that behavior on the guest side, since partially-freed DIMMs aren't currently useful on the host-side... But, in the case of pseries, I wonder if it makes sense to maybe go ahead and MADV_DONTNEED the ranges backing these released blocks so the host can at least partially reclaim the memory from a partially unplugged DIMM? > > > > > On x86, if some pages are offlined and subsequently other pages couldn't > > be offlined, then I see the full DIMM memory size remaining > > with the guest. So I infer that on x86, QEMU memory unplug either > > removes full DIMM or nothing. Is that understanding correct ? > I wouldn't bet that it's guarantied behavior but it should be this way. > > > > > Regards, > > Bharata. > > > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-26 21:03 ` Michael Roth @ 2016-04-27 6:54 ` Thomas Huth 2016-04-27 13:37 ` Igor Mammedov 2016-04-29 3:24 ` David Gibson 2 siblings, 0 replies; 33+ messages in thread From: Thomas Huth @ 2016-04-27 6:54 UTC (permalink / raw) To: Michael Roth, Igor Mammedov, Bharata B Rao Cc: nfont, qemu-ppc, qemu-devel, David Gibson On 26.04.2016 23:03, Michael Roth wrote: > Quoting Igor Mammedov (2016-04-26 02:52:36) >> On Tue, 26 Apr 2016 10:39:23 +0530 >> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: >> >>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: >>>> On Wed, 16 Mar 2016 10:11:54 +0530 >>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: >>>> >>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: >>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: >>>>>>> Add support to hot remove pc-dimm memory devices. >>>>>>> >>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> >>>>>> >>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> >>>>>> >>>>>> Looks correct, but again, needs to wait on the PAPR change. >>>> [...] >>>>> >>>>> While we are here, I would also like to get some opinion on the real >>>>> need for memory unplug. Is there anything that memory unplug gives us >>>>> which memory ballooning (shrinking mem via ballooning) can't give ? >>>> Sure ballooning can complement memory hotplug but turning it on would >>>> effectively reduce hotplug to balloning as it would enable overcommit >>>> capability instead of hard partitioning pc-dimms provides. So one >>>> could just use ballooning only and not bother with hotplug at all. >>>> >>>> On the other hand memory hotplug/unplug (at least on x86) tries >>>> to model real hardware, thus removing need in paravirt ballooning >>>> solution in favor of native guest support. >>> >>> Thanks for your views. >>> >>>> >>>> PS: >>>> Guest wise, currently hot-unplug is not well supported in linux, >>>> i.e. it's not guarantied that guest will honor unplug request >>>> as it may pin dimm by using it as a non migratable memory. So >>>> there is something to work on guest side to make unplug more >>>> reliable/guarantied. >>> >>> In the above scenario where the guest doesn't allow removal of certain >>> parts of DIMM memory, what is the expected behaviour as far as QEMU >>> DIMM device is concerned ? I seem to be running into this situation >>> very often with PowerPC mem unplug where I am left with a DIMM device >>> that has only some memory blocks released. In this situation, I would like >>> to block further unplug requests on the same device, but QEMU seems >>> to allow more such unplug requests to come in via the monitor. So >>> qdev won't help me here ? Should I detect such condition from the >>> machine unplug() handler and take required action ? >> I think offlining is a guests task along with recovering from >> inability to offline (i.e. offline all + eject or restore original state). >> QUEM does it's job by notifying guest what dimm it wants to remove >> and removes it when guest asks it (at least in x86 world). > > In the case of pseries, the DIMM abstraction isn't really exposed to > the guest, but rather the memory blocks we use to make the backing > memdev memory available to the guest. During unplug, the guest > completely releases these blocks back to QEMU, and if it can only > release a subset of what's requested it does not attempt to recover. > We can potentially change that behavior on the guest side, since > partially-freed DIMMs aren't currently useful on the host-side... > > But, in the case of pseries, I wonder if it makes sense to maybe go > ahead and MADV_DONTNEED the ranges backing these released blocks so the > host can at least partially reclaim the memory from a partially > unplugged DIMM? Sounds like this could be a good compromise. Thomas ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-26 21:03 ` Michael Roth 2016-04-27 6:54 ` Thomas Huth @ 2016-04-27 13:37 ` Igor Mammedov 2016-04-27 13:59 ` Thomas Huth ` (2 more replies) 2016-04-29 3:24 ` David Gibson 2 siblings, 3 replies; 33+ messages in thread From: Igor Mammedov @ 2016-04-27 13:37 UTC (permalink / raw) To: Michael Roth Cc: Bharata B Rao, David Gibson, thuth, qemu-devel, qemu-ppc, nfont On Tue, 26 Apr 2016 16:03:37 -0500 Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > Quoting Igor Mammedov (2016-04-26 02:52:36) > > On Tue, 26 Apr 2016 10:39:23 +0530 > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > > > On Wed, 16 Mar 2016 10:11:54 +0530 > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > > > > > Add support to hot remove pc-dimm memory devices. > > > > > > > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > > > > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > > > > > > > > > Looks correct, but again, needs to wait on the PAPR change. > > > > [...] > > > > > > > > > > While we are here, I would also like to get some opinion on the real > > > > > need for memory unplug. Is there anything that memory unplug gives us > > > > > which memory ballooning (shrinking mem via ballooning) can't give ? > > > > Sure ballooning can complement memory hotplug but turning it on would > > > > effectively reduce hotplug to balloning as it would enable overcommit > > > > capability instead of hard partitioning pc-dimms provides. So one > > > > could just use ballooning only and not bother with hotplug at all. > > > > > > > > On the other hand memory hotplug/unplug (at least on x86) tries > > > > to model real hardware, thus removing need in paravirt ballooning > > > > solution in favor of native guest support. > > > > > > Thanks for your views. > > > > > > > > > > > PS: > > > > Guest wise, currently hot-unplug is not well supported in linux, > > > > i.e. it's not guarantied that guest will honor unplug request > > > > as it may pin dimm by using it as a non migratable memory. So > > > > there is something to work on guest side to make unplug more > > > > reliable/guarantied. > > > > > > In the above scenario where the guest doesn't allow removal of certain > > > parts of DIMM memory, what is the expected behaviour as far as QEMU > > > DIMM device is concerned ? I seem to be running into this situation > > > very often with PowerPC mem unplug where I am left with a DIMM device > > > that has only some memory blocks released. In this situation, I would like > > > to block further unplug requests on the same device, but QEMU seems > > > to allow more such unplug requests to come in via the monitor. So > > > qdev won't help me here ? Should I detect such condition from the > > > machine unplug() handler and take required action ? > > I think offlining is a guests task along with recovering from > > inability to offline (i.e. offline all + eject or restore original state). > > QUEM does it's job by notifying guest what dimm it wants to remove > > and removes it when guest asks it (at least in x86 world). > > In the case of pseries, the DIMM abstraction isn't really exposed to > the guest, but rather the memory blocks we use to make the backing > memdev memory available to the guest. During unplug, the guest > completely releases these blocks back to QEMU, and if it can only > release a subset of what's requested it does not attempt to recover. > We can potentially change that behavior on the guest side, since > partially-freed DIMMs aren't currently useful on the host-side... > > But, in the case of pseries, I wonder if it makes sense to maybe go > ahead and MADV_DONTNEED the ranges backing these released blocks so the > host can at least partially reclaim the memory from a partially > unplugged DIMM? It's a little bit confusing, one asked to remove device but it's still there but not completely usable/available. What will happen when user wants that memory plugged back? It looks like reinventing ballooning, maybe it's would be better to disable unplug and use ballooning to release some memory, until guest is ready to unplug all or none of requested blocks? > > > > > > > > > On x86, if some pages are offlined and subsequently other pages couldn't > > > be offlined, then I see the full DIMM memory size remaining > > > with the guest. So I infer that on x86, QEMU memory unplug either > > > removes full DIMM or nothing. Is that understanding correct ? > > I wouldn't bet that it's guarantied behavior but it should be this way. > > > > > > > > Regards, > > > Bharata. > > > > > > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-27 13:37 ` Igor Mammedov @ 2016-04-27 13:59 ` Thomas Huth 2016-04-27 14:34 ` Igor Mammedov 2016-04-27 14:24 ` Bharata B Rao 2016-04-29 3:28 ` David Gibson 2 siblings, 1 reply; 33+ messages in thread From: Thomas Huth @ 2016-04-27 13:59 UTC (permalink / raw) To: Igor Mammedov, Michael Roth Cc: Bharata B Rao, David Gibson, qemu-devel, qemu-ppc, nfont On 27.04.2016 15:37, Igor Mammedov wrote: > On Tue, 26 Apr 2016 16:03:37 -0500 > Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > >> Quoting Igor Mammedov (2016-04-26 02:52:36) >>> On Tue, 26 Apr 2016 10:39:23 +0530 >>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: >>> >>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: >>>>> On Wed, 16 Mar 2016 10:11:54 +0530 >>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: >>>>> >>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: >>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: >>>>>>>> Add support to hot remove pc-dimm memory devices. >>>>>>>> >>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> >>>>>>> >>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> >>>>>>> >>>>>>> Looks correct, but again, needs to wait on the PAPR change. >>>>> [...] >>>>>> >>>>>> While we are here, I would also like to get some opinion on the real >>>>>> need for memory unplug. Is there anything that memory unplug gives us >>>>>> which memory ballooning (shrinking mem via ballooning) can't give ? >>>>> Sure ballooning can complement memory hotplug but turning it on would >>>>> effectively reduce hotplug to balloning as it would enable overcommit >>>>> capability instead of hard partitioning pc-dimms provides. So one >>>>> could just use ballooning only and not bother with hotplug at all. >>>>> >>>>> On the other hand memory hotplug/unplug (at least on x86) tries >>>>> to model real hardware, thus removing need in paravirt ballooning >>>>> solution in favor of native guest support. >>>> >>>> Thanks for your views. >>>> >>>>> >>>>> PS: >>>>> Guest wise, currently hot-unplug is not well supported in linux, >>>>> i.e. it's not guarantied that guest will honor unplug request >>>>> as it may pin dimm by using it as a non migratable memory. So >>>>> there is something to work on guest side to make unplug more >>>>> reliable/guarantied. >>>> >>>> In the above scenario where the guest doesn't allow removal of certain >>>> parts of DIMM memory, what is the expected behaviour as far as QEMU >>>> DIMM device is concerned ? I seem to be running into this situation >>>> very often with PowerPC mem unplug where I am left with a DIMM device >>>> that has only some memory blocks released. In this situation, I would like >>>> to block further unplug requests on the same device, but QEMU seems >>>> to allow more such unplug requests to come in via the monitor. So >>>> qdev won't help me here ? Should I detect such condition from the >>>> machine unplug() handler and take required action ? >>> I think offlining is a guests task along with recovering from >>> inability to offline (i.e. offline all + eject or restore original state). >>> QUEM does it's job by notifying guest what dimm it wants to remove >>> and removes it when guest asks it (at least in x86 world). >> >> In the case of pseries, the DIMM abstraction isn't really exposed to >> the guest, but rather the memory blocks we use to make the backing >> memdev memory available to the guest. During unplug, the guest >> completely releases these blocks back to QEMU, and if it can only >> release a subset of what's requested it does not attempt to recover. >> We can potentially change that behavior on the guest side, since >> partially-freed DIMMs aren't currently useful on the host-side... >> >> But, in the case of pseries, I wonder if it makes sense to maybe go >> ahead and MADV_DONTNEED the ranges backing these released blocks so the >> host can at least partially reclaim the memory from a partially >> unplugged DIMM? > It's a little bit confusing, one asked to remove device but it's still > there but not completely usable/available. > What will happen when user wants that memory plugged back? As far as I've understood MADV_DONTNEED, you can use the memory again at any time - just the previous contents will be gone, which is ok in this case since the guest previously marked this area as unavailable. Thomas ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-27 13:59 ` Thomas Huth @ 2016-04-27 14:34 ` Igor Mammedov 2016-04-27 19:07 ` Michael Roth 0 siblings, 1 reply; 33+ messages in thread From: Igor Mammedov @ 2016-04-27 14:34 UTC (permalink / raw) To: Thomas Huth Cc: Michael Roth, Bharata B Rao, David Gibson, qemu-devel, qemu-ppc, nfont On Wed, 27 Apr 2016 15:59:52 +0200 Thomas Huth <thuth@redhat.com> wrote: > On 27.04.2016 15:37, Igor Mammedov wrote: > > On Tue, 26 Apr 2016 16:03:37 -0500 > > Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > > > >> Quoting Igor Mammedov (2016-04-26 02:52:36) > >>> On Tue, 26 Apr 2016 10:39:23 +0530 > >>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > >>> > >>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > >>>>> On Wed, 16 Mar 2016 10:11:54 +0530 > >>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > >>>>> > >>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > >>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > >>>>>>>> Add support to hot remove pc-dimm memory devices. > >>>>>>>> > >>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > >>>>>>> > >>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > >>>>>>> > >>>>>>> Looks correct, but again, needs to wait on the PAPR change. > >>>>> [...] > >>>>>> > >>>>>> While we are here, I would also like to get some opinion on the real > >>>>>> need for memory unplug. Is there anything that memory unplug gives us > >>>>>> which memory ballooning (shrinking mem via ballooning) can't give ? > >>>>> Sure ballooning can complement memory hotplug but turning it on would > >>>>> effectively reduce hotplug to balloning as it would enable overcommit > >>>>> capability instead of hard partitioning pc-dimms provides. So one > >>>>> could just use ballooning only and not bother with hotplug at all. > >>>>> > >>>>> On the other hand memory hotplug/unplug (at least on x86) tries > >>>>> to model real hardware, thus removing need in paravirt ballooning > >>>>> solution in favor of native guest support. > >>>> > >>>> Thanks for your views. > >>>> > >>>>> > >>>>> PS: > >>>>> Guest wise, currently hot-unplug is not well supported in linux, > >>>>> i.e. it's not guarantied that guest will honor unplug request > >>>>> as it may pin dimm by using it as a non migratable memory. So > >>>>> there is something to work on guest side to make unplug more > >>>>> reliable/guarantied. > >>>> > >>>> In the above scenario where the guest doesn't allow removal of certain > >>>> parts of DIMM memory, what is the expected behaviour as far as QEMU > >>>> DIMM device is concerned ? I seem to be running into this situation > >>>> very often with PowerPC mem unplug where I am left with a DIMM device > >>>> that has only some memory blocks released. In this situation, I would like > >>>> to block further unplug requests on the same device, but QEMU seems > >>>> to allow more such unplug requests to come in via the monitor. So > >>>> qdev won't help me here ? Should I detect such condition from the > >>>> machine unplug() handler and take required action ? > >>> I think offlining is a guests task along with recovering from > >>> inability to offline (i.e. offline all + eject or restore original state). > >>> QUEM does it's job by notifying guest what dimm it wants to remove > >>> and removes it when guest asks it (at least in x86 world). > >> > >> In the case of pseries, the DIMM abstraction isn't really exposed to > >> the guest, but rather the memory blocks we use to make the backing > >> memdev memory available to the guest. During unplug, the guest > >> completely releases these blocks back to QEMU, and if it can only > >> release a subset of what's requested it does not attempt to recover. > >> We can potentially change that behavior on the guest side, since > >> partially-freed DIMMs aren't currently useful on the host-side... > >> > >> But, in the case of pseries, I wonder if it makes sense to maybe go > >> ahead and MADV_DONTNEED the ranges backing these released blocks so the > >> host can at least partially reclaim the memory from a partially > >> unplugged DIMM? > > It's a little bit confusing, one asked to remove device but it's still > > there but not completely usable/available. > > What will happen when user wants that memory plugged back? > > As far as I've understood MADV_DONTNEED, you can use the memory again at > any time - just the previous contents will be gone, which is ok in this > case since the guest previously marked this area as unavailable. If host gave returned memory to someone else there might not be enough resources to give it back (what would happen I can't tell may be VM will stall or just get exception). Anyhow I'd suggest ballooning if one needs partial unplug and fix physical unplug to unplug whole pc-dimm or none instead of turning pc-dimm device model into some hybrid with balloon device and making users/mgmt even more confused. > > Thomas > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-27 14:34 ` Igor Mammedov @ 2016-04-27 19:07 ` Michael Roth 2016-04-28 7:55 ` Igor Mammedov 0 siblings, 1 reply; 33+ messages in thread From: Michael Roth @ 2016-04-27 19:07 UTC (permalink / raw) To: Igor Mammedov, Thomas Huth Cc: Bharata B Rao, David Gibson, qemu-devel, qemu-ppc, nfont Quoting Igor Mammedov (2016-04-27 09:34:53) > On Wed, 27 Apr 2016 15:59:52 +0200 > Thomas Huth <thuth@redhat.com> wrote: > > > On 27.04.2016 15:37, Igor Mammedov wrote: > > > On Tue, 26 Apr 2016 16:03:37 -0500 > > > Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > > > > > >> Quoting Igor Mammedov (2016-04-26 02:52:36) > > >>> On Tue, 26 Apr 2016 10:39:23 +0530 > > >>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > >>> > > >>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > >>>>> On Wed, 16 Mar 2016 10:11:54 +0530 > > >>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > >>>>> > > >>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > >>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > >>>>>>>> Add support to hot remove pc-dimm memory devices. > > >>>>>>>> > > >>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > >>>>>>> > > >>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > >>>>>>> > > >>>>>>> Looks correct, but again, needs to wait on the PAPR change. > > >>>>> [...] > > >>>>>> > > >>>>>> While we are here, I would also like to get some opinion on the real > > >>>>>> need for memory unplug. Is there anything that memory unplug gives us > > >>>>>> which memory ballooning (shrinking mem via ballooning) can't give ? > > >>>>> Sure ballooning can complement memory hotplug but turning it on would > > >>>>> effectively reduce hotplug to balloning as it would enable overcommit > > >>>>> capability instead of hard partitioning pc-dimms provides. So one > > >>>>> could just use ballooning only and not bother with hotplug at all. > > >>>>> > > >>>>> On the other hand memory hotplug/unplug (at least on x86) tries > > >>>>> to model real hardware, thus removing need in paravirt ballooning > > >>>>> solution in favor of native guest support. > > >>>> > > >>>> Thanks for your views. > > >>>> > > >>>>> > > >>>>> PS: > > >>>>> Guest wise, currently hot-unplug is not well supported in linux, > > >>>>> i.e. it's not guarantied that guest will honor unplug request > > >>>>> as it may pin dimm by using it as a non migratable memory. So > > >>>>> there is something to work on guest side to make unplug more > > >>>>> reliable/guarantied. > > >>>> > > >>>> In the above scenario where the guest doesn't allow removal of certain > > >>>> parts of DIMM memory, what is the expected behaviour as far as QEMU > > >>>> DIMM device is concerned ? I seem to be running into this situation > > >>>> very often with PowerPC mem unplug where I am left with a DIMM device > > >>>> that has only some memory blocks released. In this situation, I would like > > >>>> to block further unplug requests on the same device, but QEMU seems > > >>>> to allow more such unplug requests to come in via the monitor. So > > >>>> qdev won't help me here ? Should I detect such condition from the > > >>>> machine unplug() handler and take required action ? > > >>> I think offlining is a guests task along with recovering from > > >>> inability to offline (i.e. offline all + eject or restore original state). > > >>> QUEM does it's job by notifying guest what dimm it wants to remove > > >>> and removes it when guest asks it (at least in x86 world). > > >> > > >> In the case of pseries, the DIMM abstraction isn't really exposed to > > >> the guest, but rather the memory blocks we use to make the backing > > >> memdev memory available to the guest. During unplug, the guest > > >> completely releases these blocks back to QEMU, and if it can only > > >> release a subset of what's requested it does not attempt to recover. > > >> We can potentially change that behavior on the guest side, since > > >> partially-freed DIMMs aren't currently useful on the host-side... > > >> > > >> But, in the case of pseries, I wonder if it makes sense to maybe go > > >> ahead and MADV_DONTNEED the ranges backing these released blocks so the > > >> host can at least partially reclaim the memory from a partially > > >> unplugged DIMM? > > > It's a little bit confusing, one asked to remove device but it's still > > > there but not completely usable/available. > > > What will happen when user wants that memory plugged back? > > > > As far as I've understood MADV_DONTNEED, you can use the memory again at > > any time - just the previous contents will be gone, which is ok in this > > case since the guest previously marked this area as unavailable. > If host gave returned memory to someone else there might not be enough > resources to give it back (what would happen I can't tell may be VM will > stall or just get exception). It's not really an issue for pseries, since once the LMB is released it's totally gone as far as the guest is concerned, and there's no way to plug it back in via the still-present DIMM until removal completes after, say, reset time. But, either way, I agree if we'll intend to let the guest recover, it would be immediately upon being unable to satisfy the whole unplug and not some future time. > > Anyhow I'd suggest ballooning if one needs partial unplug and fix > physical unplug to unplug whole pc-dimm or none instead of > turning pc-dimm device model into some hybrid with balloon device > and making users/mgmt even more confused. That seems reasonable, I can see why recovering memory from partially removed DIMMs overlaps a lot with the ballooning use case... But I think that kind of leaves the question of how to make memory unplug useful in practice? In practice, memory unplug seems quite likely to fail in all-or-nothing scenarios. So if we expect all-or-nothing removal in the guest, then it seems like some work needs to be done with the balloon driver or elsewhere to provide the sort of specificity management would need to know to determine if a DIMM has become fully unpluggable, and let the guest make ballooning decisions that complement eventual DIMM unplug more effectively. > > > > > Thomas > > > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-27 19:07 ` Michael Roth @ 2016-04-28 7:55 ` Igor Mammedov 0 siblings, 0 replies; 33+ messages in thread From: Igor Mammedov @ 2016-04-28 7:55 UTC (permalink / raw) To: Michael Roth Cc: Thomas Huth, nfont, David Gibson, qemu-ppc, qemu-devel, Bharata B Rao On Wed, 27 Apr 2016 14:07:10 -0500 Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > Quoting Igor Mammedov (2016-04-27 09:34:53) > > On Wed, 27 Apr 2016 15:59:52 +0200 > > Thomas Huth <thuth@redhat.com> wrote: > > > > > On 27.04.2016 15:37, Igor Mammedov wrote: > > > > On Tue, 26 Apr 2016 16:03:37 -0500 > > > > Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > > > > > > > >> Quoting Igor Mammedov (2016-04-26 02:52:36) > > > >>> On Tue, 26 Apr 2016 10:39:23 +0530 > > > >>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > >>> > > > >>>> On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > > >>>>> On Wed, 16 Mar 2016 10:11:54 +0530 > > > >>>>> Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > >>>>> > > > >>>>>> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > >>>>>>> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > >>>>>>>> Add support to hot remove pc-dimm memory devices. > > > >>>>>>>> > > > >>>>>>>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > >>>>>>> > > > >>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > >>>>>>> > > > >>>>>>> Looks correct, but again, needs to wait on the PAPR change. > > > >>>>> [...] > > > >>>>>> > > > >>>>>> While we are here, I would also like to get some opinion on the real > > > >>>>>> need for memory unplug. Is there anything that memory unplug gives us > > > >>>>>> which memory ballooning (shrinking mem via ballooning) can't give ? > > > >>>>> Sure ballooning can complement memory hotplug but turning it on would > > > >>>>> effectively reduce hotplug to balloning as it would enable overcommit > > > >>>>> capability instead of hard partitioning pc-dimms provides. So one > > > >>>>> could just use ballooning only and not bother with hotplug at all. > > > >>>>> > > > >>>>> On the other hand memory hotplug/unplug (at least on x86) tries > > > >>>>> to model real hardware, thus removing need in paravirt ballooning > > > >>>>> solution in favor of native guest support. > > > >>>> > > > >>>> Thanks for your views. > > > >>>> > > > >>>>> > > > >>>>> PS: > > > >>>>> Guest wise, currently hot-unplug is not well supported in linux, > > > >>>>> i.e. it's not guarantied that guest will honor unplug request > > > >>>>> as it may pin dimm by using it as a non migratable memory. So > > > >>>>> there is something to work on guest side to make unplug more > > > >>>>> reliable/guarantied. > > > >>>> > > > >>>> In the above scenario where the guest doesn't allow removal of certain > > > >>>> parts of DIMM memory, what is the expected behaviour as far as QEMU > > > >>>> DIMM device is concerned ? I seem to be running into this situation > > > >>>> very often with PowerPC mem unplug where I am left with a DIMM device > > > >>>> that has only some memory blocks released. In this situation, I would like > > > >>>> to block further unplug requests on the same device, but QEMU seems > > > >>>> to allow more such unplug requests to come in via the monitor. So > > > >>>> qdev won't help me here ? Should I detect such condition from the > > > >>>> machine unplug() handler and take required action ? > > > >>> I think offlining is a guests task along with recovering from > > > >>> inability to offline (i.e. offline all + eject or restore original state). > > > >>> QUEM does it's job by notifying guest what dimm it wants to remove > > > >>> and removes it when guest asks it (at least in x86 world). > > > >> > > > >> In the case of pseries, the DIMM abstraction isn't really exposed to > > > >> the guest, but rather the memory blocks we use to make the backing > > > >> memdev memory available to the guest. During unplug, the guest > > > >> completely releases these blocks back to QEMU, and if it can only > > > >> release a subset of what's requested it does not attempt to recover. > > > >> We can potentially change that behavior on the guest side, since > > > >> partially-freed DIMMs aren't currently useful on the host-side... > > > >> > > > >> But, in the case of pseries, I wonder if it makes sense to maybe go > > > >> ahead and MADV_DONTNEED the ranges backing these released blocks so the > > > >> host can at least partially reclaim the memory from a partially > > > >> unplugged DIMM? > > > > It's a little bit confusing, one asked to remove device but it's still > > > > there but not completely usable/available. > > > > What will happen when user wants that memory plugged back? > > > > > > As far as I've understood MADV_DONTNEED, you can use the memory again at > > > any time - just the previous contents will be gone, which is ok in this > > > case since the guest previously marked this area as unavailable. > > If host gave returned memory to someone else there might not be enough > > resources to give it back (what would happen I can't tell may be VM will > > stall or just get exception). > > It's not really an issue for pseries, since once the LMB is released > it's totally gone as far as the guest is concerned, and there's no > way to plug it back in via the still-present DIMM until removal > completes after, say, reset time. > > But, either way, I agree if we'll intend to let the guest recover, it > would be immediately upon being unable to satisfy the whole unplug and > not some future time. > > > > > Anyhow I'd suggest ballooning if one needs partial unplug and fix > > physical unplug to unplug whole pc-dimm or none instead of > > turning pc-dimm device model into some hybrid with balloon device > > and making users/mgmt even more confused. > > That seems reasonable, I can see why recovering memory from partially > removed DIMMs overlaps a lot with the ballooning use case... > > But I think that kind of leaves the question of how to make memory > unplug useful in practice? In practice, memory unplug seems quite > likely to fail in all-or-nothing scenarios. So if we expect I'd work on improving not yet mature native unplug support on guest side making guarantied unplug available. That would benefit not only virt which would be the first big consumer but physical systems as well. Also it would allow drop ballooning support guest wise in favor of native solution. > all-or-nothing removal in the guest, then it seems like some work > needs to be done with the balloon driver or elsewhere to provide the > sort of specificity management would need to know to determine if a > DIMM has become fully unpluggable, and let the guest make ballooning > decisions that complement eventual DIMM unplug more effectively. Currently using ballooning effectively bars pc-dimm unplug as balloon driver pins all unused pages to itself. So using them together might need some work done on ballooning side, I can't tell how much though as I'm not familiar with ballooning nor with how kernel memory allocator works. > > > > > > > > Thomas > > > > > > > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-27 13:37 ` Igor Mammedov 2016-04-27 13:59 ` Thomas Huth @ 2016-04-27 14:24 ` Bharata B Rao 2016-04-29 3:28 ` David Gibson 2 siblings, 0 replies; 33+ messages in thread From: Bharata B Rao @ 2016-04-27 14:24 UTC (permalink / raw) To: Igor Mammedov Cc: Michael Roth, David Gibson, thuth, qemu-devel, qemu-ppc, nfont On Wed, Apr 27, 2016 at 03:37:05PM +0200, Igor Mammedov wrote: > On Tue, 26 Apr 2016 16:03:37 -0500 > Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > > > Quoting Igor Mammedov (2016-04-26 02:52:36) > > > On Tue, 26 Apr 2016 10:39:23 +0530 > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > > > > On Wed, 16 Mar 2016 10:11:54 +0530 > > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > > > > > > Add support to hot remove pc-dimm memory devices. > > > > > > > > > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > > > > > > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > > > > > > > > > > > Looks correct, but again, needs to wait on the PAPR change. > > > > > [...] > > > > > > > > > > > > While we are here, I would also like to get some opinion on the real > > > > > > need for memory unplug. Is there anything that memory unplug gives us > > > > > > which memory ballooning (shrinking mem via ballooning) can't give ? > > > > > Sure ballooning can complement memory hotplug but turning it on would > > > > > effectively reduce hotplug to balloning as it would enable overcommit > > > > > capability instead of hard partitioning pc-dimms provides. So one > > > > > could just use ballooning only and not bother with hotplug at all. > > > > > > > > > > On the other hand memory hotplug/unplug (at least on x86) tries > > > > > to model real hardware, thus removing need in paravirt ballooning > > > > > solution in favor of native guest support. > > > > > > > > Thanks for your views. > > > > > > > > > > > > > > PS: > > > > > Guest wise, currently hot-unplug is not well supported in linux, > > > > > i.e. it's not guarantied that guest will honor unplug request > > > > > as it may pin dimm by using it as a non migratable memory. So > > > > > there is something to work on guest side to make unplug more > > > > > reliable/guarantied. > > > > > > > > In the above scenario where the guest doesn't allow removal of certain > > > > parts of DIMM memory, what is the expected behaviour as far as QEMU > > > > DIMM device is concerned ? I seem to be running into this situation > > > > very often with PowerPC mem unplug where I am left with a DIMM device > > > > that has only some memory blocks released. In this situation, I would like > > > > to block further unplug requests on the same device, but QEMU seems > > > > to allow more such unplug requests to come in via the monitor. So > > > > qdev won't help me here ? Should I detect such condition from the > > > > machine unplug() handler and take required action ? > > > I think offlining is a guests task along with recovering from > > > inability to offline (i.e. offline all + eject or restore original state). > > > QUEM does it's job by notifying guest what dimm it wants to remove > > > and removes it when guest asks it (at least in x86 world). > > > > In the case of pseries, the DIMM abstraction isn't really exposed to > > the guest, but rather the memory blocks we use to make the backing > > memdev memory available to the guest. During unplug, the guest > > completely releases these blocks back to QEMU, and if it can only > > release a subset of what's requested it does not attempt to recover. > > We can potentially change that behavior on the guest side, since > > partially-freed DIMMs aren't currently useful on the host-side... > > > > But, in the case of pseries, I wonder if it makes sense to maybe go > > ahead and MADV_DONTNEED the ranges backing these released blocks so the > > host can at least partially reclaim the memory from a partially > > unplugged DIMM? > It's a little bit confusing, one asked to remove device but it's still > there but not completely usable/available. > What will happen when user wants that memory plugged back? In the current patchset, the DIMM device still persists since some blocks belonging to it aren't released yet. So it is not possible to plug it back again. Regards, Bharata. ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-27 13:37 ` Igor Mammedov 2016-04-27 13:59 ` Thomas Huth 2016-04-27 14:24 ` Bharata B Rao @ 2016-04-29 3:28 ` David Gibson 2016-04-29 8:42 ` Igor Mammedov 2 siblings, 1 reply; 33+ messages in thread From: David Gibson @ 2016-04-29 3:28 UTC (permalink / raw) To: Igor Mammedov Cc: Michael Roth, Bharata B Rao, thuth, qemu-devel, qemu-ppc, nfont [-- Attachment #1: Type: text/plain, Size: 5099 bytes --] On Wed, Apr 27, 2016 at 03:37:05PM +0200, Igor Mammedov wrote: > On Tue, 26 Apr 2016 16:03:37 -0500 > Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > > > Quoting Igor Mammedov (2016-04-26 02:52:36) > > > On Tue, 26 Apr 2016 10:39:23 +0530 > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > > > > On Wed, 16 Mar 2016 10:11:54 +0530 > > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > > > > > > Add support to hot remove pc-dimm memory devices. > > > > > > > > > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > > > > > > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > > > > > > > > > > > Looks correct, but again, needs to wait on the PAPR change. > > > > > [...] > > > > > > > > > > > > While we are here, I would also like to get some opinion on the real > > > > > > need for memory unplug. Is there anything that memory unplug gives us > > > > > > which memory ballooning (shrinking mem via ballooning) can't give ? > > > > > Sure ballooning can complement memory hotplug but turning it on would > > > > > effectively reduce hotplug to balloning as it would enable overcommit > > > > > capability instead of hard partitioning pc-dimms provides. So one > > > > > could just use ballooning only and not bother with hotplug at all. > > > > > > > > > > On the other hand memory hotplug/unplug (at least on x86) tries > > > > > to model real hardware, thus removing need in paravirt ballooning > > > > > solution in favor of native guest support. > > > > > > > > Thanks for your views. > > > > > > > > > > > > > > PS: > > > > > Guest wise, currently hot-unplug is not well supported in linux, > > > > > i.e. it's not guarantied that guest will honor unplug request > > > > > as it may pin dimm by using it as a non migratable memory. So > > > > > there is something to work on guest side to make unplug more > > > > > reliable/guarantied. > > > > > > > > In the above scenario where the guest doesn't allow removal of certain > > > > parts of DIMM memory, what is the expected behaviour as far as QEMU > > > > DIMM device is concerned ? I seem to be running into this situation > > > > very often with PowerPC mem unplug where I am left with a DIMM device > > > > that has only some memory blocks released. In this situation, I would like > > > > to block further unplug requests on the same device, but QEMU seems > > > > to allow more such unplug requests to come in via the monitor. So > > > > qdev won't help me here ? Should I detect such condition from the > > > > machine unplug() handler and take required action ? > > > I think offlining is a guests task along with recovering from > > > inability to offline (i.e. offline all + eject or restore original state). > > > QUEM does it's job by notifying guest what dimm it wants to remove > > > and removes it when guest asks it (at least in x86 world). > > > > In the case of pseries, the DIMM abstraction isn't really exposed to > > the guest, but rather the memory blocks we use to make the backing > > memdev memory available to the guest. During unplug, the guest > > completely releases these blocks back to QEMU, and if it can only > > release a subset of what's requested it does not attempt to recover. > > We can potentially change that behavior on the guest side, since > > partially-freed DIMMs aren't currently useful on the host-side... > > > > But, in the case of pseries, I wonder if it makes sense to maybe go > > ahead and MADV_DONTNEED the ranges backing these released blocks so the > > host can at least partially reclaim the memory from a partially > > unplugged DIMM? > It's a little bit confusing, one asked to remove device but it's still > there but not completely usable/available. > What will happen when user wants that memory plugged back? > > It looks like reinventing ballooning, > maybe it's would be better to disable unplug and use ballooning > to release some memory, until guest is ready to unplug all or none of > requested blocks? I see your point, and it gives me an idea. I think it might be possible to connect qemu's ballooning backend, to the PAPR LMB mechanism - and in fact that might be a better match than the DIMM backend for it. The common way of removing memory with PAPR is for the host to just ask for an amount and the guest choses what to give up, which is indeed more like balloning than physical hotplug. How we intergrate that with true memory hot (in)plug which will need the DIMM mechanism I'm not quite sure. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 819 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-29 3:28 ` David Gibson @ 2016-04-29 8:42 ` Igor Mammedov 0 siblings, 0 replies; 33+ messages in thread From: Igor Mammedov @ 2016-04-29 8:42 UTC (permalink / raw) To: David Gibson Cc: Michael Roth, Bharata B Rao, thuth, qemu-devel, qemu-ppc, nfont On Fri, 29 Apr 2016 13:28:50 +1000 David Gibson <david@gibson.dropbear.id.au> wrote: > On Wed, Apr 27, 2016 at 03:37:05PM +0200, Igor Mammedov wrote: > > On Tue, 26 Apr 2016 16:03:37 -0500 > > Michael Roth <mdroth@linux.vnet.ibm.com> wrote: > > > > > Quoting Igor Mammedov (2016-04-26 02:52:36) > > > > On Tue, 26 Apr 2016 10:39:23 +0530 > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > > > > > On Wed, 16 Mar 2016 10:11:54 +0530 > > > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > > > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > > > > > > > Add support to hot remove pc-dimm memory devices. > > > > > > > > > > > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > > > > > > > > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > > > > > > > > > > > > > Looks correct, but again, needs to wait on the PAPR change. > > > > > > [...] > > > > > > > > > > > > > > While we are here, I would also like to get some opinion on the real > > > > > > > need for memory unplug. Is there anything that memory unplug gives us > > > > > > > which memory ballooning (shrinking mem via ballooning) can't give ? > > > > > > Sure ballooning can complement memory hotplug but turning it on would > > > > > > effectively reduce hotplug to balloning as it would enable overcommit > > > > > > capability instead of hard partitioning pc-dimms provides. So one > > > > > > could just use ballooning only and not bother with hotplug at all. > > > > > > > > > > > > On the other hand memory hotplug/unplug (at least on x86) tries > > > > > > to model real hardware, thus removing need in paravirt ballooning > > > > > > solution in favor of native guest support. > > > > > > > > > > Thanks for your views. > > > > > > > > > > > > > > > > > PS: > > > > > > Guest wise, currently hot-unplug is not well supported in linux, > > > > > > i.e. it's not guarantied that guest will honor unplug request > > > > > > as it may pin dimm by using it as a non migratable memory. So > > > > > > there is something to work on guest side to make unplug more > > > > > > reliable/guarantied. > > > > > > > > > > In the above scenario where the guest doesn't allow removal of certain > > > > > parts of DIMM memory, what is the expected behaviour as far as QEMU > > > > > DIMM device is concerned ? I seem to be running into this situation > > > > > very often with PowerPC mem unplug where I am left with a DIMM device > > > > > that has only some memory blocks released. In this situation, I would like > > > > > to block further unplug requests on the same device, but QEMU seems > > > > > to allow more such unplug requests to come in via the monitor. So > > > > > qdev won't help me here ? Should I detect such condition from the > > > > > machine unplug() handler and take required action ? > > > > I think offlining is a guests task along with recovering from > > > > inability to offline (i.e. offline all + eject or restore original state). > > > > QUEM does it's job by notifying guest what dimm it wants to remove > > > > and removes it when guest asks it (at least in x86 world). > > > > > > In the case of pseries, the DIMM abstraction isn't really exposed to > > > the guest, but rather the memory blocks we use to make the backing > > > memdev memory available to the guest. During unplug, the guest > > > completely releases these blocks back to QEMU, and if it can only > > > release a subset of what's requested it does not attempt to recover. > > > We can potentially change that behavior on the guest side, since > > > partially-freed DIMMs aren't currently useful on the host-side... > > > > > > But, in the case of pseries, I wonder if it makes sense to maybe go > > > ahead and MADV_DONTNEED the ranges backing these released blocks so the > > > host can at least partially reclaim the memory from a partially > > > unplugged DIMM? > > It's a little bit confusing, one asked to remove device but it's still > > there but not completely usable/available. > > What will happen when user wants that memory plugged back? > > > > It looks like reinventing ballooning, > > maybe it's would be better to disable unplug and use ballooning > > to release some memory, until guest is ready to unplug all or none of > > requested blocks? > > I see your point, and it gives me an idea. > > I think it might be possible to connect qemu's ballooning backend, to > the PAPR LMB mechanism - and in fact that might be a better match than > the DIMM backend for it. The common way of removing memory with PAPR > is for the host to just ask for an amount and the guest choses what to > give up, which is indeed more like balloning than physical hotplug. looks like ballooning case, one thing to consider here is what PAPR expects when it adds memory to guest? It's probably possible to fail request in QEMU gracefully if it can't get relinquished memory back. > How we intergrate that with true memory hot (in)plug which will need > the DIMM mechanism I'm not quite sure. Me neither, so far I was thinking about replacing paravirt ballooning with native mem hot(un)plug and dropping balloon support (which is sort of an orphan without active maintainer). But native hotplug by it's nature isn't so fine gained as ballooning, so ones who need to give up memory in chunks less that DIMM size would have to use balloon driver/device. ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-26 21:03 ` Michael Roth 2016-04-27 6:54 ` Thomas Huth 2016-04-27 13:37 ` Igor Mammedov @ 2016-04-29 3:24 ` David Gibson 2016-04-29 6:45 ` Thomas Huth 2 siblings, 1 reply; 33+ messages in thread From: David Gibson @ 2016-04-29 3:24 UTC (permalink / raw) To: Michael Roth Cc: Igor Mammedov, Bharata B Rao, thuth, qemu-devel, qemu-ppc, nfont [-- Attachment #1: Type: text/plain, Size: 4160 bytes --] On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote: > Quoting Igor Mammedov (2016-04-26 02:52:36) > > On Tue, 26 Apr 2016 10:39:23 +0530 > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > On Mon, Apr 25, 2016 at 11:20:50AM +0200, Igor Mammedov wrote: > > > > On Wed, 16 Mar 2016 10:11:54 +0530 > > > > Bharata B Rao <bharata@linux.vnet.ibm.com> wrote: > > > > > > > > > On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote: > > > > > > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote: > > > > > > > Add support to hot remove pc-dimm memory devices. > > > > > > > > > > > > > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > > > > > > > > > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > > > > > > > > > Looks correct, but again, needs to wait on the PAPR change. > > > > [...] > > > > > > > > > > While we are here, I would also like to get some opinion on the real > > > > > need for memory unplug. Is there anything that memory unplug gives us > > > > > which memory ballooning (shrinking mem via ballooning) can't give ? > > > > Sure ballooning can complement memory hotplug but turning it on would > > > > effectively reduce hotplug to balloning as it would enable overcommit > > > > capability instead of hard partitioning pc-dimms provides. So one > > > > could just use ballooning only and not bother with hotplug at all. > > > > > > > > On the other hand memory hotplug/unplug (at least on x86) tries > > > > to model real hardware, thus removing need in paravirt ballooning > > > > solution in favor of native guest support. > > > > > > Thanks for your views. > > > > > > > > > > > PS: > > > > Guest wise, currently hot-unplug is not well supported in linux, > > > > i.e. it's not guarantied that guest will honor unplug request > > > > as it may pin dimm by using it as a non migratable memory. So > > > > there is something to work on guest side to make unplug more > > > > reliable/guarantied. > > > > > > In the above scenario where the guest doesn't allow removal of certain > > > parts of DIMM memory, what is the expected behaviour as far as QEMU > > > DIMM device is concerned ? I seem to be running into this situation > > > very often with PowerPC mem unplug where I am left with a DIMM device > > > that has only some memory blocks released. In this situation, I would like > > > to block further unplug requests on the same device, but QEMU seems > > > to allow more such unplug requests to come in via the monitor. So > > > qdev won't help me here ? Should I detect such condition from the > > > machine unplug() handler and take required action ? > > I think offlining is a guests task along with recovering from > > inability to offline (i.e. offline all + eject or restore original state). > > QUEM does it's job by notifying guest what dimm it wants to remove > > and removes it when guest asks it (at least in x86 world). > > In the case of pseries, the DIMM abstraction isn't really exposed to > the guest, but rather the memory blocks we use to make the backing > memdev memory available to the guest. During unplug, the guest > completely releases these blocks back to QEMU, and if it can only > release a subset of what's requested it does not attempt to recover. > We can potentially change that behavior on the guest side, since > partially-freed DIMMs aren't currently useful on the host-side... > > But, in the case of pseries, I wonder if it makes sense to maybe go > ahead and MADV_DONTNEED the ranges backing these released blocks so the > host can at least partially reclaim the memory from a partially > unplugged DIMM? Urgh.. I can see the benefit, but I'm a bit uneasy about making the DIMM semantics different in this way on Power. I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model was a good idea after all. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 819 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-29 3:24 ` David Gibson @ 2016-04-29 6:45 ` Thomas Huth 2016-04-29 6:59 ` Bharata B Rao 2016-04-29 10:11 ` David Gibson 0 siblings, 2 replies; 33+ messages in thread From: Thomas Huth @ 2016-04-29 6:45 UTC (permalink / raw) To: David Gibson, Michael Roth Cc: Igor Mammedov, Bharata B Rao, qemu-devel, qemu-ppc, nfont [-- Attachment #1: Type: text/plain, Size: 1308 bytes --] On 29.04.2016 05:24, David Gibson wrote: > On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote: ... >> In the case of pseries, the DIMM abstraction isn't really exposed to >> the guest, but rather the memory blocks we use to make the backing >> memdev memory available to the guest. During unplug, the guest >> completely releases these blocks back to QEMU, and if it can only >> release a subset of what's requested it does not attempt to recover. >> We can potentially change that behavior on the guest side, since >> partially-freed DIMMs aren't currently useful on the host-side... >> >> But, in the case of pseries, I wonder if it makes sense to maybe go >> ahead and MADV_DONTNEED the ranges backing these released blocks so the >> host can at least partially reclaim the memory from a partially >> unplugged DIMM? > > Urgh.. I can see the benefit, but I'm a bit uneasy about making the > DIMM semantics different in this way on Power. > > I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model > was a good idea after all. Ignorant question (sorry, I really don't have much experience yet here): Could we maybe align the size of the LMBs with the size of the DIMMs? E.g. make the LMBs bigger or the DIMMs smaller, so that they match? Thomas [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 836 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-29 6:45 ` Thomas Huth @ 2016-04-29 6:59 ` Bharata B Rao 2016-04-29 8:22 ` Thomas Huth 2016-04-29 10:11 ` David Gibson 1 sibling, 1 reply; 33+ messages in thread From: Bharata B Rao @ 2016-04-29 6:59 UTC (permalink / raw) To: Thomas Huth Cc: David Gibson, Michael Roth, Igor Mammedov, qemu-devel, qemu-ppc, nfont On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote: > On 29.04.2016 05:24, David Gibson wrote: > > On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote: > ... > >> In the case of pseries, the DIMM abstraction isn't really exposed to > >> the guest, but rather the memory blocks we use to make the backing > >> memdev memory available to the guest. During unplug, the guest > >> completely releases these blocks back to QEMU, and if it can only > >> release a subset of what's requested it does not attempt to recover. > >> We can potentially change that behavior on the guest side, since > >> partially-freed DIMMs aren't currently useful on the host-side... > >> > >> But, in the case of pseries, I wonder if it makes sense to maybe go > >> ahead and MADV_DONTNEED the ranges backing these released blocks so the > >> host can at least partially reclaim the memory from a partially > >> unplugged DIMM? > > > > Urgh.. I can see the benefit, but I'm a bit uneasy about making the > > DIMM semantics different in this way on Power. > > > > I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model > > was a good idea after all. > > Ignorant question (sorry, I really don't have much experience yet here): > Could we maybe align the size of the LMBs with the size of the DIMMs? > E.g. make the LMBs bigger or the DIMMs smaller, so that they match? Should work, but the question is what should be the right size so that we have good granularity of hotplug but also not run out of mem slots thereby limiting us on the maxmem. I remember you changed the memslots to 512 in KVM, but we are yet to move up from 32 in QEMU for sPAPR though. Regards, Bharata. ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-29 6:59 ` Bharata B Rao @ 2016-04-29 8:22 ` Thomas Huth 2016-04-29 8:30 ` Igor Mammedov 0 siblings, 1 reply; 33+ messages in thread From: Thomas Huth @ 2016-04-29 8:22 UTC (permalink / raw) To: bharata Cc: David Gibson, Michael Roth, Igor Mammedov, qemu-devel, qemu-ppc, nfont On 29.04.2016 08:59, Bharata B Rao wrote: > On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote: >> On 29.04.2016 05:24, David Gibson wrote: >>> On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote: >> ... >>>> In the case of pseries, the DIMM abstraction isn't really exposed to >>>> the guest, but rather the memory blocks we use to make the backing >>>> memdev memory available to the guest. During unplug, the guest >>>> completely releases these blocks back to QEMU, and if it can only >>>> release a subset of what's requested it does not attempt to recover. >>>> We can potentially change that behavior on the guest side, since >>>> partially-freed DIMMs aren't currently useful on the host-side... >>>> >>>> But, in the case of pseries, I wonder if it makes sense to maybe go >>>> ahead and MADV_DONTNEED the ranges backing these released blocks so the >>>> host can at least partially reclaim the memory from a partially >>>> unplugged DIMM? >>> >>> Urgh.. I can see the benefit, but I'm a bit uneasy about making the >>> DIMM semantics different in this way on Power. >>> >>> I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model >>> was a good idea after all. >> >> Ignorant question (sorry, I really don't have much experience yet here): >> Could we maybe align the size of the LMBs with the size of the DIMMs? >> E.g. make the LMBs bigger or the DIMMs smaller, so that they match? > > Should work, but the question is what should be the right size so that > we have good granularity of hotplug but also not run out of mem slots > thereby limiting us on the maxmem. I remember you changed the memslots > to 512 in KVM, but we are yet to move up from 32 in QEMU for sPAPR though. Half of the slots should be "reserved" for PCI and other stuff, so we could use 256 for memory - that way we would also on the same level as x86 which also uses 256 memslots here, as far as I know. Anyway, couldn't we simply calculate the SPAPR_MEMORY_BLOCK_SIZE dynamically, according to the maxmem and slot values that the user specified? So that SPAPR_MEMORY_BLOCK_SIZE simply would match the DIMM size? ... or is there some constraint that I've missed so that SPAPR_MEMORY_BLOCK_SIZE has to be a compile-time #defined value? Thomas ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-29 8:22 ` Thomas Huth @ 2016-04-29 8:30 ` Igor Mammedov 2016-04-29 11:01 ` Thomas Huth 0 siblings, 1 reply; 33+ messages in thread From: Igor Mammedov @ 2016-04-29 8:30 UTC (permalink / raw) To: Thomas Huth Cc: bharata, David Gibson, Michael Roth, qemu-devel, qemu-ppc, nfont On Fri, 29 Apr 2016 10:22:03 +0200 Thomas Huth <thuth@redhat.com> wrote: > On 29.04.2016 08:59, Bharata B Rao wrote: > > On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote: > >> On 29.04.2016 05:24, David Gibson wrote: > >>> On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote: > >> ... > >>>> In the case of pseries, the DIMM abstraction isn't really exposed to > >>>> the guest, but rather the memory blocks we use to make the backing > >>>> memdev memory available to the guest. During unplug, the guest > >>>> completely releases these blocks back to QEMU, and if it can only > >>>> release a subset of what's requested it does not attempt to recover. > >>>> We can potentially change that behavior on the guest side, since > >>>> partially-freed DIMMs aren't currently useful on the host-side... > >>>> > >>>> But, in the case of pseries, I wonder if it makes sense to maybe go > >>>> ahead and MADV_DONTNEED the ranges backing these released blocks so the > >>>> host can at least partially reclaim the memory from a partially > >>>> unplugged DIMM? > >>> > >>> Urgh.. I can see the benefit, but I'm a bit uneasy about making the > >>> DIMM semantics different in this way on Power. > >>> > >>> I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model > >>> was a good idea after all. > >> > >> Ignorant question (sorry, I really don't have much experience yet here): > >> Could we maybe align the size of the LMBs with the size of the DIMMs? > >> E.g. make the LMBs bigger or the DIMMs smaller, so that they match? > > > > Should work, but the question is what should be the right size so that > > we have good granularity of hotplug but also not run out of mem slots > > thereby limiting us on the maxmem. I remember you changed the memslots > > to 512 in KVM, but we are yet to move up from 32 in QEMU for sPAPR though. > > Half of the slots should be "reserved" for PCI and other stuff, so we > could use 256 for memory - that way we would also on the same level as > x86 which also uses 256 memslots here, as far as I know. > > Anyway, couldn't we simply calculate the SPAPR_MEMORY_BLOCK_SIZE > dynamically, according to the maxmem and slot values that the user > specified? So that SPAPR_MEMORY_BLOCK_SIZE simply would match the DIMM > size? ... or is there some constraint that I've missed so that > SPAPR_MEMORY_BLOCK_SIZE has to be a compile-time #defined value? If you do that than possible DIMM size should be decided at startup and fixed. If DIMM of wrong size is plugged in machine should fail hotplug request. Question is how mgmt will know fixed DIMM size that sPAPR just calculated? > > Thomas > ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-29 8:30 ` Igor Mammedov @ 2016-04-29 11:01 ` Thomas Huth 0 siblings, 0 replies; 33+ messages in thread From: Thomas Huth @ 2016-04-29 11:01 UTC (permalink / raw) To: Igor Mammedov Cc: bharata, David Gibson, Michael Roth, qemu-devel, qemu-ppc, nfont On 29.04.2016 10:30, Igor Mammedov wrote: > On Fri, 29 Apr 2016 10:22:03 +0200 > Thomas Huth <thuth@redhat.com> wrote: > >> On 29.04.2016 08:59, Bharata B Rao wrote: >>> On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote: >>>> On 29.04.2016 05:24, David Gibson wrote: >>>>> On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote: >>>> ... >>>>>> In the case of pseries, the DIMM abstraction isn't really exposed to >>>>>> the guest, but rather the memory blocks we use to make the backing >>>>>> memdev memory available to the guest. During unplug, the guest >>>>>> completely releases these blocks back to QEMU, and if it can only >>>>>> release a subset of what's requested it does not attempt to recover. >>>>>> We can potentially change that behavior on the guest side, since >>>>>> partially-freed DIMMs aren't currently useful on the host-side... >>>>>> >>>>>> But, in the case of pseries, I wonder if it makes sense to maybe go >>>>>> ahead and MADV_DONTNEED the ranges backing these released blocks so the >>>>>> host can at least partially reclaim the memory from a partially >>>>>> unplugged DIMM? >>>>> >>>>> Urgh.. I can see the benefit, but I'm a bit uneasy about making the >>>>> DIMM semantics different in this way on Power. >>>>> >>>>> I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model >>>>> was a good idea after all. >>>> >>>> Ignorant question (sorry, I really don't have much experience yet here): >>>> Could we maybe align the size of the LMBs with the size of the DIMMs? >>>> E.g. make the LMBs bigger or the DIMMs smaller, so that they match? >>> >>> Should work, but the question is what should be the right size so that >>> we have good granularity of hotplug but also not run out of mem slots >>> thereby limiting us on the maxmem. I remember you changed the memslots >>> to 512 in KVM, but we are yet to move up from 32 in QEMU for sPAPR though. >> >> Half of the slots should be "reserved" for PCI and other stuff, so we >> could use 256 for memory - that way we would also on the same level as >> x86 which also uses 256 memslots here, as far as I know. >> >> Anyway, couldn't we simply calculate the SPAPR_MEMORY_BLOCK_SIZE >> dynamically, according to the maxmem and slot values that the user >> specified? So that SPAPR_MEMORY_BLOCK_SIZE simply would match the DIMM >> size? ... or is there some constraint that I've missed so that >> SPAPR_MEMORY_BLOCK_SIZE has to be a compile-time #defined value? > If you do that than possible DIMM size should be decided at startup > and fixed. If DIMM of wrong size is plugged in machine should fail > hotplug request. > Question is how mgmt will know fixed DIMM size that sPAPR just calculated? Ok, sorry, I somehow had that bad idea in mind that all DIMMs for hot-plugging should have the same size. That's of course not the case if we model something similar to DIMM plugging on real hardware. So please never mind, it was just a wrong assumption on my side. OTOH, it maybe also does not make sense to keep the LMB size always at such a small, fixed value. Imagine the user specifies slots=32 and maxmem=32G ... maybe we should then disallow plugging DIMMs that are smaller than 1G, so we could use a LMB size of 1G in this case? (plugging DIMMs of different size > 1G would then still be allowed, too, of course) Thomas ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support 2016-04-29 6:45 ` Thomas Huth 2016-04-29 6:59 ` Bharata B Rao @ 2016-04-29 10:11 ` David Gibson 1 sibling, 0 replies; 33+ messages in thread From: David Gibson @ 2016-04-29 10:11 UTC (permalink / raw) To: Thomas Huth Cc: Michael Roth, Igor Mammedov, Bharata B Rao, qemu-devel, qemu-ppc, nfont [-- Attachment #1: Type: text/plain, Size: 2040 bytes --] On Fri, Apr 29, 2016 at 08:45:37AM +0200, Thomas Huth wrote: > On 29.04.2016 05:24, David Gibson wrote: > > On Tue, Apr 26, 2016 at 04:03:37PM -0500, Michael Roth wrote: > ... > >> In the case of pseries, the DIMM abstraction isn't really exposed to > >> the guest, but rather the memory blocks we use to make the backing > >> memdev memory available to the guest. During unplug, the guest > >> completely releases these blocks back to QEMU, and if it can only > >> release a subset of what's requested it does not attempt to recover. > >> We can potentially change that behavior on the guest side, since > >> partially-freed DIMMs aren't currently useful on the host-side... > >> > >> But, in the case of pseries, I wonder if it makes sense to maybe go > >> ahead and MADV_DONTNEED the ranges backing these released blocks so the > >> host can at least partially reclaim the memory from a partially > >> unplugged DIMM? > > > > Urgh.. I can see the benefit, but I'm a bit uneasy about making the > > DIMM semantics different in this way on Power. > > > > I'm shoehorning the PAPR DR memory mechanism into the qemu DIMM model > > was a good idea after all. > > Ignorant question (sorry, I really don't have much experience yet here): > Could we maybe align the size of the LMBs with the size of the DIMMs? > E.g. make the LMBs bigger or the DIMMs smaller, so that they match? Um... maybe. DIMMs don't have to all be the same size, whereas LMBs do, but maybe we can work around that. In theory we could increase the LMB size, but I'd be pretty worried that guests might not cope with a setup so different from what PowerVM gives us. Decreasing the DIMMs to LMB size should certainly work in theory, but could be very painful from the user point of view to have to add a memory block for every 256MiB. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 819 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support 2016-03-15 4:38 [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support Bharata B Rao 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support Bharata B Rao @ 2016-05-27 15:48 ` Thomas Huth 2016-05-27 16:32 ` Michael Roth 2 siblings, 1 reply; 33+ messages in thread From: Thomas Huth @ 2016-05-27 15:48 UTC (permalink / raw) To: Bharata B Rao, qemu-devel; +Cc: mdroth, qemu-ppc, nfont, imammedo, david Hi Bharata, On 15.03.2016 05:38, Bharata B Rao wrote: > This patchset adds memory hot removal support for PowerPC sPAPR. > This new version switches to using the proposed "count-indexed" type of > hotplug identifier which allows to hot remove a number of LMBs starting > with a given DRC index. > > This count-indexed hotplug identifier isn't yet part of PAPR. Just for clarification / my understanding: That means we also need a modified guest to support this new interface? If yes, did you post such patches somewhere else already, too? Thomas ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support 2016-05-27 15:48 ` [Qemu-devel] [RFC PATCH v2 0/2] " Thomas Huth @ 2016-05-27 16:32 ` Michael Roth 0 siblings, 0 replies; 33+ messages in thread From: Michael Roth @ 2016-05-27 16:32 UTC (permalink / raw) To: Thomas Huth, Bharata B Rao, qemu-devel; +Cc: qemu-ppc, nfont, imammedo, david Quoting Thomas Huth (2016-05-27 10:48:45) > Hi Bharata, > > On 15.03.2016 05:38, Bharata B Rao wrote: > > This patchset adds memory hot removal support for PowerPC sPAPR. > > This new version switches to using the proposed "count-indexed" type of > > hotplug identifier which allows to hot remove a number of LMBs starting > > with a given DRC index. > > > > This count-indexed hotplug identifier isn't yet part of PAPR. > > Just for clarification / my understanding: That means we also need a > modified guest to support this new interface? If yes, did you post such > patches somewhere else already, too? No patches posted yet, but hopefully soon. These bits will likely be added as part of an effort that moves all memory hotplug/unplug into guest kernel instead of relying on drmgr. Most of the bits for in-kernel memory hotplug are already upstream, but there's a number of other requirements in the spec update (like a new hotplug interrupt/queue instead of re-using EPOW) that need to be addressed as part of the switchover. > > Thomas > ^ permalink raw reply [flat|nested] 33+ messages in thread
end of thread, other threads:[~2016-05-27 16:32 UTC | newest] Thread overview: 33+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2016-03-15 4:38 [Qemu-devel] [RFC PATCH v2 0/2] spapr: Memory hot-unplug support Bharata B Rao 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type Bharata B Rao 2016-03-16 1:29 ` David Gibson 2016-03-17 16:03 ` Michael Roth 2016-03-15 4:38 ` [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support Bharata B Rao 2016-03-16 1:36 ` David Gibson 2016-03-16 4:41 ` Bharata B Rao 2016-03-16 5:11 ` David Gibson 2016-03-23 3:22 ` David Gibson 2016-03-24 14:15 ` Nathan Fontenot 2016-03-29 4:41 ` David Gibson 2016-04-25 9:20 ` Igor Mammedov 2016-04-26 5:09 ` Bharata B Rao 2016-04-26 7:52 ` Igor Mammedov 2016-04-26 21:03 ` Michael Roth 2016-04-27 6:54 ` Thomas Huth 2016-04-27 13:37 ` Igor Mammedov 2016-04-27 13:59 ` Thomas Huth 2016-04-27 14:34 ` Igor Mammedov 2016-04-27 19:07 ` Michael Roth 2016-04-28 7:55 ` Igor Mammedov 2016-04-27 14:24 ` Bharata B Rao 2016-04-29 3:28 ` David Gibson 2016-04-29 8:42 ` Igor Mammedov 2016-04-29 3:24 ` David Gibson 2016-04-29 6:45 ` Thomas Huth 2016-04-29 6:59 ` Bharata B Rao 2016-04-29 8:22 ` Thomas Huth 2016-04-29 8:30 ` Igor Mammedov 2016-04-29 11:01 ` Thomas Huth 2016-04-29 10:11 ` David Gibson 2016-05-27 15:48 ` [Qemu-devel] [RFC PATCH v2 0/2] " Thomas Huth 2016-05-27 16:32 ` Michael Roth
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).