From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: "Liu, Jinsong" <jinsong.liu@intel.com>
Cc: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
Konrad Rzeszutek Wilk <konrad@kernel.org>
Subject: Re: [PATCH V2 2/2] Xen ACPI memory hotplug
Date: Fri, 11 Jan 2013 13:21:24 -0500 [thread overview]
Message-ID: <20130111182124.GB28685@phenom.dumpdata.com> (raw)
In-Reply-To: <DE8DF0795D48FD4CA783C40EC82923353BB10D@SHSMSX101.ccr.corp.intel.com>
On Wed, Jan 09, 2013 at 07:42:52AM +0000, Liu, Jinsong wrote:
> This patch implements real Xen acpi memory hotplug driver as module.
> When loaded, it replaces Xen stub driver.
>
> When an acpi memory device hotadd event occurs, it notifies OS and
> invokes notification callback, adding related memory device and parsing
> memory information, finally hypercall to xen hypervisor to add memory.
>
> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
> ---
> drivers/xen/Kconfig | 11 +
> drivers/xen/Makefile | 1 +
> drivers/xen/xen-acpi-memhotplug.c | 487 +++++++++++++++++++++++++++++++++++++
> include/xen/interface/platform.h | 13 +-
> 4 files changed, 508 insertions(+), 4 deletions(-)
> create mode 100644 drivers/xen/xen-acpi-memhotplug.c
>
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 2986de9..b8cf899 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -191,6 +191,17 @@ config XEN_STUB
>
> To enable Xen features like cpu and memory hotplug, select Y here.
>
> +config XEN_ACPI_HOTPLUG_MEMORY
> + tristate "Xen ACPI memory hotplug"
> + depends on XEN_STUB && ACPI
> + default n
> + help
> + This is Xen ACPI memory hotplug.
> +
> + Currently Xen only support ACPI memory hot-add. If you want
> + to hot-add memory at runtime (the hot-added memory cannot be
> + removed until machine stop), select Y/M here, otherwise select N.
> +
> config XEN_ACPI_PROCESSOR
> tristate "Xen ACPI processor"
> depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index b63edd8..1605f59 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -31,6 +31,7 @@ obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o
> obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
> obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
> obj-$(CONFIG_XEN_STUB) += xen-stub.o
> +obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o
> obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
> xen-evtchn-y := evtchn.o
> xen-gntdev-y := gntdev.o
> diff --git a/drivers/xen/xen-acpi-memhotplug.c b/drivers/xen/xen-acpi-memhotplug.c
> new file mode 100644
> index 0000000..d207fec
> --- /dev/null
> +++ b/drivers/xen/xen-acpi-memhotplug.c
> @@ -0,0 +1,487 @@
> +/*
> + * Copyright (C) 2012 Intel Corporation
> + * Author: Liu Jinsong <jinsong.liu@intel.com>
> + * Author: Jiang Yunhong <yunhong.jiang@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or (at
> + * your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
> + * NON INFRINGEMENT. See the GNU General Public License for more
> + * details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/types.h>
> +#include <linux/acpi.h>
> +#include <acpi/acpi_drivers.h>
> +#include <xen/acpi.h>
> +#include <xen/interface/platform.h>
> +#include <asm/xen/hypercall.h>
> +
> +#define PREFIX "ACPI:xen_memory_hotplug:"
> +
> +struct acpi_memory_info {
> + struct list_head list;
> + u64 start_addr; /* Memory Range start physical addr */
> + u64 length; /* Memory Range length */
> + unsigned short caching; /* memory cache attribute */
> + unsigned short write_protect; /* memory read/write attribute */
> + /* copied from buffer getting from _CRS */
> + unsigned int enabled:1;
> +};
> +
> +struct acpi_memory_device {
> + struct acpi_device *device;
> + struct list_head res_list;
> +};
> +
> +static bool acpi_hotmem_initialized __read_mostly;
> +
> +static int xen_hotadd_memory(int pxm, struct acpi_memory_info *info)
> +{
> + struct xen_platform_op op;
> +
> + op.cmd = XENPF_mem_hotadd;
> + op.u.mem_add.spfn = info->start_addr >> PAGE_SHIFT;
> + op.u.mem_add.epfn = (info->start_addr + info->length) >> PAGE_SHIFT;
> + op.u.mem_add.pxm = pxm;
> +
> + return HYPERVISOR_dom0_op(&op);
Don't want to print out the erorrs if it failed?
Say do:
int rc;
..
rc = HYPERVSIOR_dom0_op(&op);
if (rc)
pr_error(PFX "Hotplug Memory Add failed on %lx->%lx, _PXM: %d, error: %d\n",
...
?
> +}
> +
> +static int xen_acpi_get_pxm(acpi_handle h)
> +{
> + unsigned long long pxm;
> + acpi_status status;
> + acpi_handle handle;
> + acpi_handle phandle = h;
> +
> + do {
> + handle = phandle;
> + status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
> + if (ACPI_SUCCESS(status))
> + return pxm;
> + status = acpi_get_parent(handle, &phandle);
> + } while (ACPI_SUCCESS(status));
> + return -1;
Ugh. Why not a normal -Exxx type erorr? Say -ENXIO ?
> +}
> +
> +static int xen_acpi_memory_enable_device(struct acpi_memory_device *mem_device)
> +{
> + int pxm, result;
> + int num_enabled = 0;
> + struct acpi_memory_info *info;
> +
> + if (!mem_device)
> + return -EINVAL;
> +
> + pxm = xen_acpi_get_pxm(mem_device->device->handle);
> + if (pxm < 0)
> + return -EINVAL;
> +
> + list_for_each_entry(info, &mem_device->res_list, list) {
> + if (info->enabled) { /* just sanity check...*/
> + num_enabled++;
> + continue;
> + }
> +
> + if (!info->length)
> + continue;
> +
> + result = xen_hotadd_memory(pxm, info);
> + if (result)
> + continue;
> + info->enabled = 1;
> + num_enabled++;
> + }
> +
> + if (!num_enabled)
> + return -EINVAL;
Is that the correct error to be returned? I thought
-ENODEV would be more appropiate?
> +
> + return 0;
> +}
> +
> +static acpi_status
> +acpi_memory_get_resource(struct acpi_resource *resource, void *context)
> +{
> + struct acpi_memory_device *mem_device = context;
> + struct acpi_resource_address64 address64;
> + struct acpi_memory_info *info, *new;
> + acpi_status status;
> +
> + status = acpi_resource_to_address64(resource, &address64);
> + if (ACPI_FAILURE(status) ||
> + (address64.resource_type != ACPI_MEMORY_RANGE))
> + return AE_OK;
> +
> + list_for_each_entry(info, &mem_device->res_list, list) {
> + if ((info->caching == address64.info.mem.caching) &&
> + (info->write_protect == address64.info.mem.write_protect) &&
> + (info->start_addr + info->length == address64.minimum)) {
> + info->length += address64.address_length;
> + return AE_OK;
> + }
> + }
> +
> + new = kzalloc(sizeof(struct acpi_memory_info), GFP_KERNEL);
> + if (!new)
> + return AE_ERROR;
> +
> + INIT_LIST_HEAD(&new->list);
> + new->caching = address64.info.mem.caching;
> + new->write_protect = address64.info.mem.write_protect;
> + new->start_addr = address64.minimum;
> + new->length = address64.address_length;
> + list_add_tail(&new->list, &mem_device->res_list);
> +
> + return AE_OK;
> +}
> +
> +static int
> +acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
> +{
> + acpi_status status;
> + struct acpi_memory_info *info, *n;
> +
> + if (!list_empty(&mem_device->res_list))
> + return 0;
> +
> + status = acpi_walk_resources(mem_device->device->handle,
> + METHOD_NAME__CRS, acpi_memory_get_resource, mem_device);
> +
> + if (ACPI_FAILURE(status)) {
> + list_for_each_entry_safe(info, n, &mem_device->res_list, list)
> + kfree(info);
> + INIT_LIST_HEAD(&mem_device->res_list);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int
> +acpi_memory_get_device(acpi_handle handle,
> + struct acpi_memory_device **mem_device)
> +{
> + acpi_status status;
> + acpi_handle phandle;
> + struct acpi_device *device = NULL;
> + struct acpi_device *pdevice = NULL;
> + int result;
> +
> + if (!acpi_bus_get_device(handle, &device) && device)
> + goto end;
> +
> + status = acpi_get_parent(handle, &phandle);
> + if (ACPI_FAILURE(status)) {
> + pr_warn(PREFIX "Cannot find acpi parent\n");
> + return -EINVAL;
> + }
> +
> + /* Get the parent device */
> + result = acpi_bus_get_device(phandle, &pdevice);
> + if (result) {
> + pr_warn(PREFIX "Cannot get acpi bus device\n");
> + return -EINVAL;
> + }
> +
> + /*
> + * Now add the notified device. This creates the acpi_device
> + * and invokes .add function
> + */
> + result = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE);
> + if (result) {
> + pr_warn(PREFIX "Cannot add acpi bus\n");
> + return -EINVAL;
> + }
> +
> +end:
> + *mem_device = acpi_driver_data(device);
> + if (!(*mem_device)) {
> + pr_err(PREFIX "Driver data not found\n");
> + return -ENODEV;
> + }
> +
> + return 0;
> +}
> +
> +static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
> +{
> + unsigned long long current_status;
> +
> + /* Get device present/absent information from the _STA */
> + if (ACPI_FAILURE(acpi_evaluate_integer(mem_device->device->handle,
> + "_STA", NULL, ¤t_status)))
> + return -ENODEV;
> + /*
> + * Check for device status. Device should be
> + * present/enabled/functioning.
> + */
> + if (!((current_status & ACPI_STA_DEVICE_PRESENT)
> + && (current_status & ACPI_STA_DEVICE_ENABLED)
> + && (current_status & ACPI_STA_DEVICE_FUNCTIONING)))
> + return -ENODEV;
> +
> + return 0;
> +}
> +
> +static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
> +{
> + pr_warn(PREFIX "Xen does not support memory hotremove\n");
So is this going to show in the dmesg if the user supplies the '0' in the
SysFS?
Hmm, perhaps that should be pr_debug as the -ENOSYS is enough to tell
the user that we don't support it.
> +
> + return -ENOSYS;
> +}
> +
> +static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
> +{
> + struct acpi_memory_device *mem_device;
> + struct acpi_device *device;
> + u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */
> +
> + switch (event) {
> + case ACPI_NOTIFY_BUS_CHECK:
> + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> + "\nReceived BUS CHECK notification for device\n"));
> + /* Fall Through */
> + case ACPI_NOTIFY_DEVICE_CHECK:
> + if (event == ACPI_NOTIFY_DEVICE_CHECK)
> + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> + "\nReceived DEVICE CHECK notification for device\n"));
> +
> + if (acpi_memory_get_device(handle, &mem_device)) {
> + pr_err(PREFIX "Cannot find driver data\n");
> + break;
> + }
> +
> + ost_code = ACPI_OST_SC_SUCCESS;
> + break;
> +
> + case ACPI_NOTIFY_EJECT_REQUEST:
> + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> + "\nReceived EJECT REQUEST notification for device\n"));
> +
> + if (acpi_bus_get_device(handle, &device)) {
> + pr_err(PREFIX "Device doesn't exist\n");
> + break;
> + }
> + mem_device = acpi_driver_data(device);
> + if (!mem_device) {
> + pr_err(PREFIX "Driver Data is NULL\n");
> + break;
> + }
> +
> + /*
> + * TBD: implement acpi_memory_disable_device and invoke
> + * acpi_bus_remove if Xen support hotremove in the future
> + */
> + acpi_memory_disable_device(mem_device);
> + break;
> +
> + default:
> + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> + "Unsupported event [0x%x]\n", event));
> + /* non-hotplug event; possibly handled by other handler */
> + return;
> + }
> +
> + (void) acpi_evaluate_hotplug_ost(handle, event, ost_code, NULL);
> + return;
> +}
> +
> +static int xen_acpi_memory_device_add(struct acpi_device *device)
> +{
> + int result;
> + struct acpi_memory_device *mem_device = NULL;
> +
> +
> + if (!device)
> + return -EINVAL;
> +
> + mem_device = kzalloc(sizeof(struct acpi_memory_device), GFP_KERNEL);
> + if (!mem_device)
> + return -ENOMEM;
> +
> + INIT_LIST_HEAD(&mem_device->res_list);
> + mem_device->device = device;
> + sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
> + sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
> + device->driver_data = mem_device;
> +
> + /* Get the range from the _CRS */
> + result = acpi_memory_get_device_resources(mem_device);
> + if (result) {
> + kfree(mem_device);
> + return result;
> + }
> +
> + /*
> + * Early boot code has recognized memory area by EFI/E820.
> + * If DSDT shows these memory devices on boot, hotplug is not necessary
> + * for them. So, it just returns until completion of this driver's
> + * start up.
"So it just returns until completion of this drivers's start up."
Can you change that to be:
"Return OK until this driver starts up."
But then.. how can this function be called with acpi_hotmem_initialized=false?
Is it b/c of the acpi_walk_namespace call? How about you state that:
"This can be done via the acpi_walk_namespace which is called during
early boot and acpi_hotmem_initialized is set _after_ that call
has completed."
> + */
> + if (!acpi_hotmem_initialized)
> + return 0;
> +
> + if (!acpi_memory_check_device(mem_device))
> + result = xen_acpi_memory_enable_device(mem_device);
> +
> + return result;
> +}
> +
> +static int xen_acpi_memory_device_remove(struct acpi_device *device, int type)
> +{
> + struct acpi_memory_device *mem_device = NULL;
> +
> + if (!device || !acpi_driver_data(device))
> + return -EINVAL;
> +
> + mem_device = acpi_driver_data(device);
> + kfree(mem_device);
> +
> + return 0;
> +}
> +
> +/*
> + * Helper function to check for memory device
> + */
> +static acpi_status is_memory_device(acpi_handle handle)
> +{
> + char *hardware_id;
> + acpi_status status;
> + struct acpi_device_info *info;
> +
> + status = acpi_get_object_info(handle, &info);
> + if (ACPI_FAILURE(status))
> + return status;
> +
> + if (!(info->valid & ACPI_VALID_HID)) {
> + kfree(info);
> + return AE_ERROR;
> + }
> +
> + hardware_id = info->hardware_id.string;
> + if ((hardware_id == NULL) ||
> + (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
> + status = AE_ERROR;
> +
> + kfree(info);
> + return status;
> +}
> +
> +static acpi_status
> +acpi_memory_register_notify_handler(acpi_handle handle,
> + u32 level, void *ctxt, void **retv)
> +{
> + acpi_status status;
> +
> + status = is_memory_device(handle);
> + if (ACPI_FAILURE(status))
> + return AE_OK; /* continue */
> +
> + status = acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
> + acpi_memory_device_notify, NULL);
> + /* continue */
> + return AE_OK;
> +}
> +
> +static acpi_status
> +acpi_memory_deregister_notify_handler(acpi_handle handle,
> + u32 level, void *ctxt, void **retv)
> +{
> + acpi_status status;
> +
> + status = is_memory_device(handle);
> + if (ACPI_FAILURE(status))
> + return AE_OK; /* continue */
> +
> + status = acpi_remove_notify_handler(handle,
> + ACPI_SYSTEM_NOTIFY,
> + acpi_memory_device_notify);
> +
> + return AE_OK; /* continue */
> +}
> +
> +static const struct acpi_device_id memory_device_ids[] = {
> + {ACPI_MEMORY_DEVICE_HID, 0},
> + {"", 0},
> +};
> +MODULE_DEVICE_TABLE(acpi, memory_device_ids);
> +
> +static struct acpi_driver xen_acpi_memory_device_driver = {
> + .name = "acpi_memhotplug",
> + .class = ACPI_MEMORY_DEVICE_CLASS,
> + .ids = memory_device_ids,
> + .ops = {
> + .add = xen_acpi_memory_device_add,
> + .remove = xen_acpi_memory_device_remove,
> + },
> +};
> +
> +static int __init xen_acpi_memory_device_init(void)
> +{
> + int result;
> + acpi_status status;
> +
> + if (!xen_initial_domain())
> + return -ENODEV;
> +
> + /* unregister the stub which only used to reserve driver space */
> + acpi_bus_unregister_driver(&xen_stub_memory_device_driver);
> +
> + result = acpi_bus_register_driver(&xen_acpi_memory_device_driver);
> + if (result < 0)
> + return -ENODEV;
Shouldn't we then try to re-register the stub driver?
> +
> + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
> + ACPI_UINT32_MAX,
> + acpi_memory_register_notify_handler,
> + NULL, NULL, NULL);
> +
> + if (ACPI_FAILURE(status)) {
> + pr_warn(PREFIX "walk_namespace failed\n");
> + acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
Ditto here.
> + return -ENODEV;
> + }
> +
> + acpi_hotmem_initialized = 1;
s/1/true/
> + return 0;
> +}
> +
> +static void __exit xen_acpi_memory_device_exit(void)
> +{
> + acpi_status status;
> +
> + if (!xen_initial_domain())
> + return;
> +
> + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
> + ACPI_UINT32_MAX,
> + acpi_memory_deregister_notify_handler,
> + NULL, NULL, NULL);
> + if (ACPI_FAILURE(status))
> + pr_warn(PREFIX "walk_namespace failed\n");
> +
> + acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
> +
> + /*
> + * stub reserve space again to prevent any chance of native
> + * driver loading, though not much meaning in real life
not much meaning in real life? What does that mean?
> + */
> + acpi_bus_register_driver(&xen_stub_memory_device_driver);
> + return;
> +}
> +
> +module_init(xen_acpi_memory_device_init);
> +module_exit(xen_acpi_memory_device_exit);
> +ACPI_MODULE_NAME("xen-acpi-memhotplug");
> +MODULE_AUTHOR("Liu Jinsong <jinsong.liu@intel.com>");
> +MODULE_DESCRIPTION("Xen Hotplug Mem Driver");
> +MODULE_LICENSE("GPL");
> diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
> index 5e36932..2c4fb4b 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -324,10 +324,14 @@ struct xenpf_cpu_ol {
> };
> DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
>
> -/*
> - * CMD 58 and 59 are reserved for cpu hotadd and memory hotadd,
> - * which are already occupied at Xen hypervisor side.
> - */
> +#define XENPF_mem_hotadd 59
> +struct xenpf_mem_hotadd {
> + uint64_t spfn;
> + uint64_t epfn;
> + uint32_t pxm;
> + uint32_t flags;
> +};
> +
> #define XENPF_core_parking 60
> struct xenpf_core_parking {
> /* IN variables */
> @@ -357,6 +361,7 @@ struct xen_platform_op {
> struct xenpf_set_processor_pminfo set_pminfo;
> struct xenpf_pcpuinfo pcpu_info;
> struct xenpf_cpu_ol cpu_ol;
> + struct xenpf_mem_hotadd mem_add;
> struct xenpf_core_parking core_parking;
> uint8_t pad[128];
> } u;
> --
> 1.7.1
next prev parent reply other threads:[~2013-01-11 18:21 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-09 7:42 [PATCH V2 2/2] Xen ACPI memory hotplug Liu, Jinsong
2013-01-11 18:21 ` Konrad Rzeszutek Wilk [this message]
2013-01-14 6:00 ` Liu, Jinsong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130111182124.GB28685@phenom.dumpdata.com \
--to=konrad.wilk@oracle.com \
--cc=jinsong.liu@intel.com \
--cc=konrad@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.