linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH R4 6/7] mm: Extend memory hotplug API to allow memory hotplug in virtual guests
@ 2011-03-08 21:50 Daniel Kiper
  2011-03-08 23:51 ` Dave Hansen
  0 siblings, 1 reply; 3+ messages in thread
From: Daniel Kiper @ 2011-03-08 21:50 UTC (permalink / raw)
  To: ian.campbell, akpm, andi.kleen, haicheng.li, fengguang.wu, jeremy,
	konrad.wilk, dan.magenheimer, v.tolstov, pasik, dave, wdauchy,
	rientjes, xen-devel, linux-kernel, linux-mm

This patch extends memory hotplug API to allow easy memory hotplug
in virtual guests. It contains:
  - generic section aligment macro,
  - online_page_chain and apropriate functions for registering/unregistering
    online page notifiers,
  - add_virtual_memory(u64 *size) function which adds memory region
    of size >= *size above max_pfn; new region is section aligned
    and size is modified to be multiple of section size.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
---
 include/linux/memory_hotplug.h |    6 ++-
 include/linux/mmzone.h         |    2 +
 mm/memory_hotplug.c            |   91 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8122018..4cfc5a0 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -68,12 +68,13 @@ static inline void zone_seqlock_init(struct zone *zone)
 extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
 extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
-/* need some defines for these for archs that don't support it */
-extern void online_page(struct page *page);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
+extern int register_online_page_notifier(struct notifier_block *nb);
+extern int unregister_online_page_notifier(struct notifier_block *nb);
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_pageblock_removable_nolock(struct page *page);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
@@ -224,6 +225,7 @@ static inline int is_mem_section_removable(unsigned long pfn,
 
 extern int mem_online_node(int nid);
 extern int add_memory(int nid, u64 start, u64 size);
+extern int add_virtual_memory(u64 *size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int remove_memory(u64 start, u64 size);
 extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 02ecb01..76a7cbd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -931,6 +931,8 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
 #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
 
+#define SECTION_ALIGN(pfn)	(((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
+
 #ifdef CONFIG_SPARSEMEM
 
 /*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 321fc74..3b38d89 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -29,11 +29,25 @@
 #include <linux/suspend.h>
 #include <linux/mm_inline.h>
 #include <linux/firmware-map.h>
+#include <linux/notifier.h>
 
 #include <asm/tlbflush.h>
 
 #include "internal.h"
 
+/*
+ * online_page_chain contains chain of notifiers called when page is onlined.
+ * When kernel is booting native_online_page_notifier() is registered with
+ * priority 0 as default notifier. Custom notifier should be registered with
+ * pririty > 0. It could be terminal (it should return NOTIFY_STOP on success)
+ * or not (it should return NOTIFY_DONE or NOTIFY_OK on success; for full list
+ * of return codes look into include/linux/notifier.h).
+ *
+ * Working example of usage: drivers/xen/balloon.c
+ */
+
+static RAW_NOTIFIER_HEAD(online_page_chain);
+
 DEFINE_MUTEX(mem_hotplug_mutex);
 
 void lock_memory_hotplug(void)
@@ -361,8 +375,33 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 }
 EXPORT_SYMBOL_GPL(__remove_pages);
 
-void online_page(struct page *page)
+int register_online_page_notifier(struct notifier_block *nb)
+{
+	int rc;
+
+	lock_memory_hotplug();
+	rc = raw_notifier_chain_register(&online_page_chain, nb);
+	unlock_memory_hotplug();
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(register_online_page_notifier);
+
+int unregister_online_page_notifier(struct notifier_block *nb)
+{
+	int rc;
+
+	lock_memory_hotplug();
+	rc = raw_notifier_chain_unregister(&online_page_chain, nb);
+	unlock_memory_hotplug();
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(unregister_online_page_notifier);
+
+static int native_online_page_notifier(struct notifier_block *nb, unsigned long val, void *v)
 {
+	struct page *page = v;
 	unsigned long pfn = page_to_pfn(page);
 
 	totalram_pages++;
@@ -375,12 +414,30 @@ void online_page(struct page *page)
 #endif
 
 #ifdef CONFIG_FLATMEM
-	max_mapnr = max(page_to_pfn(page), max_mapnr);
+	max_mapnr = max(pfn, max_mapnr);
 #endif
 
 	ClearPageReserved(page);
 	init_page_count(page);
 	__free_page(page);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block native_online_page_nb = {
+	.notifier_call = native_online_page_notifier,
+	.priority = 0
+};
+
+static int __init init_online_page_chain(void)
+{
+	return register_online_page_notifier(&native_online_page_nb);
+}
+pure_initcall(init_online_page_chain);
+
+static void online_page(struct page *page)
+{
+	raw_notifier_call_chain(&online_page_chain, 0, page);
 }
 
 static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
@@ -591,6 +648,36 @@ out:
 }
 EXPORT_SYMBOL_GPL(add_memory);
 
+/*
+ * add_virtual_memory() adds memory region of size >= *size above max_pfn.
+ * New region is section aligned and size is modified to be multiple of
+ * section size. Those features allow optimal use of address space and
+ * establish proper aligment when this function is called first time after
+ * boot (last section not fully populated at boot time may contains unused
+ * memory pages with PG_reserved bit not set; online_pages() does not allow
+ * page onlining in whole section if first page does not have PG_reserved
+ * bit set). Real size of added memory should be established at page onlining
+ * stage.
+ *
+ * This function is often used in virtual guests because mainly they do not
+ * care about new memory region address.
+ *
+ * Working example of usage: drivers/xen/balloon.c
+ */
+
+int add_virtual_memory(u64 *size)
+{
+	int nid;
+	u64 start;
+
+	start = PFN_PHYS(SECTION_ALIGN(max_pfn));
+	*size = (((*size >> PAGE_SHIFT) & PAGE_SECTION_MASK) + PAGES_PER_SECTION) << PAGE_SHIFT;
+	nid = memory_add_physaddr_to_nid(start);
+
+	return add_memory(nid, start, *size);
+}
+EXPORT_SYMBOL_GPL(add_virtual_memory);
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 /*
  * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
-- 
1.5.6.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH R4 6/7] mm: Extend memory hotplug API to allow memory hotplug in virtual guests
  2011-03-08 21:50 [PATCH R4 6/7] mm: Extend memory hotplug API to allow memory hotplug in virtual guests Daniel Kiper
@ 2011-03-08 23:51 ` Dave Hansen
  2011-03-10  8:51   ` Daniel Kiper
  0 siblings, 1 reply; 3+ messages in thread
From: Dave Hansen @ 2011-03-08 23:51 UTC (permalink / raw)
  To: Daniel Kiper
  Cc: ian.campbell, akpm, andi.kleen, haicheng.li, fengguang.wu, jeremy,
	konrad.wilk, dan.magenheimer, v.tolstov, pasik, wdauchy, rientjes,
	xen-devel, linux-kernel, linux-mm

On Tue, 2011-03-08 at 22:50 +0100, Daniel Kiper wrote:
> This patch extends memory hotplug API to allow easy memory hotplug
> in virtual guests. It contains:
>   - generic section aligment macro,
>   - online_page_chain and apropriate functions for registering/unregistering
>     online page notifiers,
>   - add_virtual_memory(u64 *size) function which adds memory region
>     of size >= *size above max_pfn; new region is section aligned
>     and size is modified to be multiple of section size.

Usually, when you can list stuff out like this, it's a good sign that
they belong in separate patches.  I think it's true here as well.

But, these are looking a lot better.  It looks like much less code, and
it's quite a bit simpler.

> +/*
> + * online_page_chain contains chain of notifiers called when page is onlined.
> + * When kernel is booting native_online_page_notifier() is registered with
> + * priority 0 as default notifier. Custom notifier should be registered with
> + * pririty > 0. It could be terminal (it should return NOTIFY_STOP on success)

"pririty"?

> + * or not (it should return NOTIFY_DONE or NOTIFY_OK on success; for full list
> + * of return codes look into include/linux/notifier.h).
> + *
> + * Working example of usage: drivers/xen/balloon.c
> + */
> +
> +static RAW_NOTIFIER_HEAD(online_page_chain);
> +
>  DEFINE_MUTEX(mem_hotplug_mutex);
> 
>  void lock_memory_hotplug(void)
> @@ -361,8 +375,33 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
>  }
>  EXPORT_SYMBOL_GPL(__remove_pages);
> 
> -void online_page(struct page *page)
> +int register_online_page_notifier(struct notifier_block *nb)
> +{
> +	int rc;
> +
> +	lock_memory_hotplug();
> +	rc = raw_notifier_chain_register(&online_page_chain, nb);
> +	unlock_memory_hotplug();
> +
> +	return rc;
> +}
> +EXPORT_SYMBOL_GPL(register_online_page_notifier);
> +
> +int unregister_online_page_notifier(struct notifier_block *nb)
> +{
> +	int rc;
> +
> +	lock_memory_hotplug();
> +	rc = raw_notifier_chain_unregister(&online_page_chain, nb);
> +	unlock_memory_hotplug();
> +
> +	return rc;
> +}
> +EXPORT_SYMBOL_GPL(unregister_online_page_notifier);

The whole "native" thing really is Xen terminology.  Could we call this
"generic_online_page_notifier()" perhaps?  This really isn't even
"native" either since some hypervisors actually do use this code.

> +static int native_online_page_notifier(struct notifier_block *nb, unsigned long val, void *v)
>  {
> +	struct page *page = v;
>  	unsigned long pfn = page_to_pfn(page);
> 
>  	totalram_pages++;
> @@ -375,12 +414,30 @@ void online_page(struct page *page)
>  #endif
> 
>  #ifdef CONFIG_FLATMEM
> -	max_mapnr = max(page_to_pfn(page), max_mapnr);
> +	max_mapnr = max(pfn, max_mapnr);
>  #endif

This is another tidbit that's probably good to do, but it's superfluous
here.  

>  	ClearPageReserved(page);
>  	init_page_count(page);
>  	__free_page(page);
> +
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block native_online_page_nb = {
> +	.notifier_call = native_online_page_notifier,
> +	.priority = 0
> +};

That comment about priority really belongs here.  

 /*
  * 0 priority makes this the fallthrough default.  All
  * architectures wanting to override this should set a
  * higher priority and return NOTIFY_STOP to keep this
  * from running.
  */

> +static int __init init_online_page_chain(void)
> +{
> +	return register_online_page_notifier(&native_online_page_nb);
> +}
> +pure_initcall(init_online_page_chain);
> +
> +static void online_page(struct page *page)
> +{
> +	raw_notifier_call_chain(&online_page_chain, 0, page);
>  }
> 
>  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
> @@ -591,6 +648,36 @@ out:
>  }
>  EXPORT_SYMBOL_GPL(add_memory);
> 
> +/*
> + * add_virtual_memory() adds memory region of size >= *size above max_pfn.
> + * New region is section aligned and size is modified to be multiple of
> + * section size.

Aligned up or down?  Why did you choose up or down?

> Those features allow optimal use of address space and
> + * establish proper aligment when this function is called first time after

                      ^^^^^^^^ alignment?

> + * boot (last section not fully populated at boot time may contains unused
> + * memory pages with PG_reserved bit not set; online_pages() does not allow
> + * page onlining in whole section if first page does not have PG_reserved
> + * bit set). Real size of added memory should be established at page onlining
> + * stage.
> + *
> + * This function is often used in virtual guests because mainly they do not
> + * care about new memory region address.

Remember, you're touching generic memory hotplug code here.  I really
don't know what a "virtual guest" is or how it relates to this code.
How about something like this?

        This code is expected to be used in cases where a certain amount
        of memory needs to get added, but when the hardware or
        hypervisor does not dictate where it will be placed.

> + * Working example of usage: drivers/xen/balloon.c

Please pull this out.  It'll probably become stale before anyone uses
it.  I trust people to know how to use cscope. :)

> +int add_virtual_memory(u64 *size)
> +{
> +	int nid;
> +	u64 start;
> +
> +	start = PFN_PHYS(SECTION_ALIGN(max_pfn));
> +	*size = (((*size >> PAGE_SHIFT) & PAGE_SECTION_MASK) + PAGES_PER_SECTION) << PAGE_SHIFT;

Why use PFN_PHYS() in one case but not the other?

I'd also highly suggest using the ALIGN() macro in cases like this.  It
makes it much more readable:

	*size = PFN_PHYS(ALIGN(*size, SECTION_SIZE)));	

> +	nid = memory_add_physaddr_to_nid(start);
> +
> +	return add_memory(nid, start, *size);
> +}

Could you talk a little bit more about how 'size' gets used?  Also, are
we sure we want an interface where we're so liberal with 'size'?  It
seems like requiring that it be section-aligned is a fair burden to
place on the caller.  That way, we're not in a position of _guessing_
what the caller wants (aligning up or down).

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH R4 6/7] mm: Extend memory hotplug API to allow memory hotplug in virtual guests
  2011-03-08 23:51 ` Dave Hansen
@ 2011-03-10  8:51   ` Daniel Kiper
  0 siblings, 0 replies; 3+ messages in thread
From: Daniel Kiper @ 2011-03-10  8:51 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Daniel Kiper, ian.campbell, akpm, andi.kleen, haicheng.li,
	fengguang.wu, jeremy, konrad.wilk, dan.magenheimer, v.tolstov,
	pasik, wdauchy, rientjes, xen-devel, linux-kernel, linux-mm

On Tue, Mar 08, 2011 at 03:51:12PM -0800, Dave Hansen wrote:
> On Tue, 2011-03-08 at 22:50 +0100, Daniel Kiper wrote:
> > +int add_virtual_memory(u64 *size)
> > +{
> > +	int nid;
> > +	u64 start;
> > +
> > +	start = PFN_PHYS(SECTION_ALIGN(max_pfn));
> > +	*size = (((*size >> PAGE_SHIFT) & PAGE_SECTION_MASK) + PAGES_PER_SECTION) << PAGE_SHIFT;
>
> Why use PFN_PHYS() in one case but not the other?

I know that this is the same, however, I think PFN_PHYS() usage suggest
that I do a PFN/address manipulation. It is not true in that case (I do
an operation on region size) and I would like to avoid that ambiguity.

> I'd also highly suggest using the ALIGN() macro in cases like this.  It
> makes it much more readable:

OK.

> 	*size = PFN_PHYS(ALIGN(*size, SECTION_SIZE)));
>
> > +	nid = memory_add_physaddr_to_nid(start);
> > +
> > +	return add_memory(nid, start, *size);
> > +}
>
> Could you talk a little bit more about how 'size' gets used?  Also, are
> we sure we want an interface where we're so liberal with 'size'?  It
> seems like requiring that it be section-aligned is a fair burden to
> place on the caller.  That way, we're not in a position of _guessing_
> what the caller wants (aligning up or down).

I do not have like this function since I created it. However,
I decided to sent it for review. It does not simplify anything
(add_memory() as a generic function is sufficient) and it is
too inflexible. Now, I am sure that everything in its body
should be moved to platform specific module (in that case Xen).
I am going to that on next patch release.

Daniel

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-03-10  8:52 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-08 21:50 [PATCH R4 6/7] mm: Extend memory hotplug API to allow memory hotplug in virtual guests Daniel Kiper
2011-03-08 23:51 ` Dave Hansen
2011-03-10  8:51   ` Daniel Kiper

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).