All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Add 2M page support to Xen balloon driver
@ 2009-06-16 15:53 Dave McCracken
  2009-06-17  8:42 ` Jan Beulich
  0 siblings, 1 reply; 3+ messages in thread
From: Dave McCracken @ 2009-06-16 15:53 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Xen Developers List

[-- Attachment #1: Type: text/plain, Size: 625 bytes --]


This patch adds a kernel command line option "balloon_hugepages" that, when 
enabled, will make the balloon driver work in 2M pages (ie hugepages).  This 
will work in conjunction with the "superpages" domain creation option so once 
a domain is created with 2M contiguous pages it will continue to free and re-
allocate at the 2M page size.

Note that the current hypervisor code does not allow 2M page allocations for 
all guest domains.  Keir has agreed to change the hypervisor to allow them, 
but for now "balloon_hugepages" should only be specified on hypervisors that 
have this change.

Dave McCracken
Oracle Corp.


[-- Attachment #2: xen-balloon-2.6.30-1.patch --]
[-- Type: text/plain, Size: 7787 bytes --]

--- 2.6.30/./drivers/xen/balloon.c	2009-06-09 22:05:27.000000000 -0500
+++ 2.6.30-domu/./drivers/xen/balloon.c	2009-06-16 10:23:25.000000000 -0500
@@ -58,7 +58,7 @@
 #include <xen/features.h>
 #include <xen/page.h>
 
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10))
 
 #define BALLOON_CLASS_NAME "xen_memory"
 
@@ -93,6 +93,13 @@ static DEFINE_SPINLOCK(balloon_lock);
 
 static struct balloon_stats balloon_stats;
 
+/*
+ * Work in pages of this order.  Can be either 0 for normal pages
+ * or 9 for hugepages.
+ */
+static int balloon_order;
+static unsigned long balloon_npages;
+
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
@@ -197,12 +204,11 @@ static unsigned long current_target(void
 
 static int increase_reservation(unsigned long nr_pages)
 {
-	unsigned long  pfn, i, flags;
+	unsigned long  pfn, mfn, i, j, flags;
 	struct page   *page;
 	long           rc;
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
-		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 
@@ -214,12 +220,14 @@ static int increase_reservation(unsigned
 	page = balloon_first_page();
 	for (i = 0; i < nr_pages; i++) {
 		BUG_ON(page == NULL);
-		frame_list[i] = page_to_pfn(page);;
+		frame_list[i] = page_to_pfn(page);
 		page = balloon_next_page(page);
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents = nr_pages;
+	reservation.extent_order = balloon_order;
+
 	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
 	if (rc < nr_pages) {
 		if (rc > 0) {
@@ -242,19 +250,22 @@ static int increase_reservation(unsigned
 		BUG_ON(page == NULL);
 
 		pfn = page_to_pfn(page);
+		mfn = frame_list[i];
 		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
 		       phys_to_machine_mapping_valid(pfn));
 
-		set_phys_to_machine(pfn, frame_list[i]);
+		for (j = 0; j < balloon_npages; j++, pfn++, mfn++) {
+			set_phys_to_machine(pfn, mfn);
 
-		/* Link back into the page tables if not highmem. */
-		if (pfn < max_low_pfn) {
-			int ret;
-			ret = HYPERVISOR_update_va_mapping(
-				(unsigned long)__va(pfn << PAGE_SHIFT),
-				mfn_pte(frame_list[i], PAGE_KERNEL),
-				0);
-			BUG_ON(ret);
+			/* Link back into the page tables if not highmem. */
+			if (pfn < max_low_pfn) {
+				int ret;
+				ret = HYPERVISOR_update_va_mapping(
+					(unsigned long)__va(pfn << PAGE_SHIFT),
+					mfn_pte(mfn, PAGE_KERNEL),
+					0);
+				BUG_ON(ret);
+			}
 		}
 
 		/* Relinquish the page back to the allocator. */
@@ -264,7 +275,7 @@ static int increase_reservation(unsigned
 	}
 
 	balloon_stats.current_pages += nr_pages;
-	totalram_pages = balloon_stats.current_pages;
+	totalram_pages = balloon_stats.current_pages << balloon_order;
 
  out:
 	spin_unlock_irqrestore(&balloon_lock, flags);
@@ -274,13 +285,12 @@ static int increase_reservation(unsigned
 
 static int decrease_reservation(unsigned long nr_pages)
 {
-	unsigned long  pfn, i, flags;
+	unsigned long  pfn, i, j, flags;
 	struct page   *page;
 	int            need_sleep = 0;
 	int ret;
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
-		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 
@@ -288,7 +298,7 @@ static int decrease_reservation(unsigned
 		nr_pages = ARRAY_SIZE(frame_list);
 
 	for (i = 0; i < nr_pages; i++) {
-		if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+		if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) {
 			nr_pages = i;
 			need_sleep = 1;
 			break;
@@ -298,14 +308,6 @@ static int decrease_reservation(unsigned
 		frame_list[i] = pfn_to_mfn(pfn);
 
 		scrub_page(page);
-
-		if (!PageHighMem(page)) {
-			ret = HYPERVISOR_update_va_mapping(
-				(unsigned long)__va(pfn << PAGE_SHIFT),
-				__pte_ma(0), 0);
-			BUG_ON(ret);
-                }
-
 	}
 
 	/* Ensure that ballooned highmem pages don't have kmaps. */
@@ -317,17 +319,26 @@ static int decrease_reservation(unsigned
 	/* No more mappings: invalidate P2M and add to balloon. */
 	for (i = 0; i < nr_pages; i++) {
 		pfn = mfn_to_pfn(frame_list[i]);
-		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 		balloon_append(pfn_to_page(pfn));
+		for (j = 0; j < balloon_npages; j++, pfn++) {
+			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+			if (!PageHighMem(page)) {
+				ret = HYPERVISOR_update_va_mapping(
+					(unsigned long)__va(pfn << PAGE_SHIFT),
+					__pte_ma(0), 0);
+				BUG_ON(ret);
+			}
+		}
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents   = nr_pages;
+	reservation.extent_order = balloon_order;
 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 	BUG_ON(ret != nr_pages);
 
 	balloon_stats.current_pages -= nr_pages;
-	totalram_pages = balloon_stats.current_pages;
+	totalram_pages = balloon_stats.current_pages << balloon_order;
 
 	spin_unlock_irqrestore(&balloon_lock, flags);
 
@@ -397,7 +408,7 @@ static void watch_target(struct xenbus_w
 	/* The given memory/target value is in KiB, so it needs converting to
 	 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
 	 */
-	balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+	balloon_set_new_target(new_target >> (PAGE_SHIFT - 10 + balloon_order));
 }
 
 static int balloon_init_watcher(struct notifier_block *notifier,
@@ -423,10 +434,13 @@ static int __init balloon_init(void)
 	if (!xen_pv_domain())
 		return -ENODEV;
 
-	pr_info("xen_balloon: Initialising balloon driver.\n");
+	pr_info("xen_balloon: Initialising balloon driver with page order %d.\n",
+		balloon_order);
+
+	balloon_npages = 1 << balloon_order;
 
-	balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
-	totalram_pages   = balloon_stats.current_pages;
+	balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order;
+	totalram_pages   = balloon_stats.current_pages << balloon_order;
 	balloon_stats.target_pages  = balloon_stats.current_pages;
 	balloon_stats.balloon_low   = 0;
 	balloon_stats.balloon_high  = 0;
@@ -440,10 +454,12 @@ static int __init balloon_init(void)
 	register_balloon(&balloon_sysdev);
 
 	/* Initialise the balloon with excess memory space. */
-	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
-		page = pfn_to_page(pfn);
-		if (!PageReserved(page))
-			balloon_append(page);
+	if (!balloon_order) {
+		for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+			page = pfn_to_page(pfn);
+			if (!PageReserved(page))
+				balloon_append(page);
+		}
 	}
 
 	target_watch.callback = watch_target;
@@ -464,6 +480,14 @@ static void balloon_exit(void)
 
 module_exit(balloon_exit);
 
+static int __init balloon_parse_huge(char *s)
+{
+	balloon_order = 9;
+	return 1;
+}
+
+__setup("balloon_hugepages", balloon_parse_huge);
+
 #define BALLOON_SHOW(name, format, args...)				\
 	static ssize_t show_##name(struct sys_device *dev,		\
 				   struct sysdev_attribute *attr,	\
@@ -500,7 +524,7 @@ static ssize_t store_target_kb(struct sy
 
 	target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
 
-	balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+	balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
 
 	return count;
 }
@@ -514,7 +538,7 @@ static ssize_t show_target(struct sys_de
 {
 	return sprintf(buf, "%llu\n",
 		       (unsigned long long)balloon_stats.target_pages
-		       << PAGE_SHIFT);
+		       << (PAGE_SHIFT + balloon_order));
 }
 
 static ssize_t store_target(struct sys_device *dev,
@@ -530,7 +554,7 @@ static ssize_t store_target(struct sys_d
 
 	target_bytes = memparse(buf, &endchar);
 
-	balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+	balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
 
 	return count;
 }

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Add 2M page support to Xen balloon driver
  2009-06-16 15:53 [PATCH] Add 2M page support to Xen balloon driver Dave McCracken
@ 2009-06-17  8:42 ` Jan Beulich
  2009-06-17 16:40   ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 3+ messages in thread
From: Jan Beulich @ 2009-06-17  8:42 UTC (permalink / raw)
  To: Dave McCracken; +Cc: Jeremy Fitzhardinge, Xen Developers List

>>> Dave McCracken <dcm@mccr.org> 16.06.09 17:53 >>>
>
>This patch adds a kernel command line option "balloon_hugepages" that, when 
>enabled, will make the balloon driver work in 2M pages (ie hugepages).  This 
>will work in conjunction with the "superpages" domain creation option so once 
>a domain is created with 2M contiguous pages it will continue to free and re-
>allocate at the 2M page size.
>
>Note that the current hypervisor code does not allow 2M page allocations for 
>all guest domains.  Keir has agreed to change the hypervisor to allow them, 
>but for now "balloon_hugepages" should only be specified on hypervisors that 
>have this change.

How would that work with future (currently only some piece of dead code in
xen-netfront.c does so) code altering the p2m map outside of the balloon
driver? Shouldn't you at least verify the large page you allocated is indeed
machine-contiguous?

Also, after reasonably long uptime and on a reasonably loaded machine -
how good are the chances you would be able to allocate a large page
through alloc_pages() in the first place?

Jan

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Add 2M page support to Xen balloon  driver
  2009-06-17  8:42 ` Jan Beulich
@ 2009-06-17 16:40   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 3+ messages in thread
From: Jeremy Fitzhardinge @ 2009-06-17 16:40 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Dave McCracken, Xen Developers List

On 06/17/09 01:42, Jan Beulich wrote:
>> This patch adds a kernel command line option "balloon_hugepages" that, when
>> enabled, will make the balloon driver work in 2M pages (ie hugepages).  This
>> will work in conjunction with the "superpages" domain creation option so once
>> a domain is created with 2M contiguous pages it will continue to free and re-
>> allocate at the 2M page size.
>>
>> Note that the current hypervisor code does not allow 2M page allocations for
>> all guest domains.  Keir has agreed to change the hypervisor to allow them,
>> but for now "balloon_hugepages" should only be specified on hypervisors that
>> have this change.
>>      
>
> How would that work with future (currently only some piece of dead code in
> xen-netfront.c does so) code altering the p2m map outside of the balloon
> driver? Shouldn't you at least verify the large page you allocated is indeed
> machine-contiguous?
>    

I have some experimental patches to move memory around at boot time to 
avoid e820 holes.  It would need to take care with 2M pages.  The dma 
code also updates the p2m map when it makes a page range contiguous.

> Also, after reasonably long uptime and on a reasonably loaded machine -
> how good are the chances you would be able to allocate a large page
> through alloc_pages() in the first place?
>    

I think you lose the ability to allocate 2M pages pretty quickly; 
probably only a few mins on a moderately loaded server (depends on total 
memory size, of course).  On the other hand, the VM can now relocate 
user and pagecache pages to try and satisfy large memory allocations, so 
maybe it can manage it for longer or even indefinitely.

     J

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-06-17 16:40 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-06-16 15:53 [PATCH] Add 2M page support to Xen balloon driver Dave McCracken
2009-06-17  8:42 ` Jan Beulich
2009-06-17 16:40   ` Jeremy Fitzhardinge

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.