* [Linux PATCH] Mark hugepages to the hypervisor to speed up mapping (with signoff this time)
@ 2010-06-09 17:20 Dave McCracken
2010-06-09 18:04 ` Jeremy Fitzhardinge
0 siblings, 1 reply; 3+ messages in thread
From: Dave McCracken @ 2010-06-09 17:20 UTC (permalink / raw)
To: Jeremy Fitzhardinge; +Cc: Xen Developers List
Two new hypercalls were recently added to the Xen hypervisor to mark
and unmark hugepages as in use. Once a hugepage has been marked,
actually mapping the hugepage is sped up by a significant amount (more
than 20 times faster).
This patch marks hugepages when they are added to the hugepage pool
and unmarks them when they are removed. It also checks to make sure
each hugepage is contiguous at the machine level.
Signed-off-by: Dave McCracken <dave.mccracken@oracle.com>
--------
--- next/arch/x86/xen/mmu.c 2010-06-01 11:41:54.000000000 -0500
+++ next-fh//arch/x86/xen/mmu.c 2010-06-02 08:45:01.000000000 -0500
@@ -2531,3 +2531,51 @@ static int __init xen_mmu_debugfs(void)
fs_initcall(xen_mmu_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */
+
+static int mark_ok = 1;
+
+int arch_prepare_hugepage(struct page *page)
+{
+ struct mmuext_op op;
+ unsigned long pfn, mfn, m;
+ int i;
+ int rc;
+
+ pfn = page_to_pfn(page);
+ mfn = pfn_to_mfn(pfn);
+ if (mfn & ((HPAGE_SIZE/PAGE_SIZE)-1)) {
+ printk("Guest pages are not properly aligned to use hugepages\n");
+ return 1;
+ }
+ for (i = 0, m = mfn; i < HPAGE_SIZE/PAGE_SIZE; i++, pfn++, m++) {
+ if (pfn_to_mfn(pfn) != m) {
+ printk("Guest pages are not properly aligned to use hugepages\n");
+ return 1;
+ }
+ }
+ /* It's ok if this fails. We just fall back to the slow refcounting */
+ if (mark_ok) {
+ op.cmd = MMUEXT_MARK_SUPER;
+ op.arg1.mfn = mfn;
+ rc = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+ if (rc) {
+ if (rc == -ENOSYS) {
+ mark_ok = 0;
+ return 0;
+ }
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void arch_release_hugepage(struct page *page)
+{
+ struct mmuext_op op;
+
+ if (mark_ok) {
+ op.cmd = MMUEXT_UNMARK_SUPER;
+ op.arg1.mfn = pfn_to_mfn(page_to_pfn(page));
+ HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+ }
+}
--- next/arch/x86/include/asm/hugetlb.h 2010-06-01 11:41:54.000000000 -0500
+++ next-fh//arch/x86/include/asm/hugetlb.h 2010-06-02 08:37:58.000000000 -0500
@@ -95,6 +95,10 @@ static inline int huge_ptep_set_access_f
return changed;
}
+#ifdef CONFIG_XEN
+int arch_prepare_hugepage(struct page *page);
+void arch_release_hugepage(struct page *page);
+#else
static inline int arch_prepare_hugepage(struct page *page)
{
return 0;
@@ -103,5 +107,6 @@ static inline int arch_prepare_hugepage(
static inline void arch_release_hugepage(struct page *page)
{
}
+#endif
#endif /* _ASM_X86_HUGETLB_H */
--- next/include/xen/interface/xen.h 2010-06-01 11:41:59.000000000 -0500
+++ next-fh//include/xen/interface/xen.h 2010-06-02 08:38:50.000000000 -0500
@@ -167,6 +167,19 @@
* cmd: MMUEXT_SET_LDT
* linear_addr: Linear address of LDT base (NB. must be page-aligned).
* nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
+ *
+ * cmd: MMUEXT_MARK_SUPER
+ * mfn: Machine frame number of head of superpage to be marked.
+ *
+ * cmd: MMUEXT_UNMARK_SUPER
+ * mfn: Machine frame number of head of superpage to be cleared.
*/
#define MMUEXT_PIN_L1_TABLE 0
#define MMUEXT_PIN_L2_TABLE 1
@@ -183,6 +196,10 @@
#define MMUEXT_FLUSH_CACHE 12
#define MMUEXT_SET_LDT 13
#define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE 16
+#define MMUEXT_COPY_PAGE 17
+#define MMUEXT_MARK_SUPER 19
+#define MMUEXT_UNMARK_SUPER 20
#ifndef __ASSEMBLY__
#include <linux/types.h>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Linux PATCH] Mark hugepages to the hypervisor to speed up mapping (with signoff this time)
2010-06-09 17:20 [Linux PATCH] Mark hugepages to the hypervisor to speed up mapping (with signoff this time) Dave McCracken
@ 2010-06-09 18:04 ` Jeremy Fitzhardinge
2010-06-09 18:20 ` Dave McCracken
0 siblings, 1 reply; 3+ messages in thread
From: Jeremy Fitzhardinge @ 2010-06-09 18:04 UTC (permalink / raw)
To: Dave McCracken; +Cc: Xen Developers List
On 06/09/2010 10:20 AM, Dave McCracken wrote:
> Two new hypercalls were recently added to the Xen hypervisor to mark
> and unmark hugepages as in use. Once a hugepage has been marked,
> actually mapping the hugepage is sped up by a significant amount (more
> than 20 times faster).
>
> This patch marks hugepages when they are added to the hugepage pool
> and unmarks them when they are removed. It also checks to make sure
> each hugepage is contiguous at the machine level.
>
> Signed-off-by: Dave McCracken <dave.mccracken@oracle.com>
>
> --------
>
>
> --- next/arch/x86/xen/mmu.c 2010-06-01 11:41:54.000000000 -0500
> +++ next-fh//arch/x86/xen/mmu.c 2010-06-02 08:45:01.000000000 -0500
> @@ -2531,3 +2531,51 @@ static int __init xen_mmu_debugfs(void)
> fs_initcall(xen_mmu_debugfs);
>
> #endif /* CONFIG_XEN_DEBUG_FS */
> +
> +static int mark_ok = 1;
>
__read_mostly
> +
> +int arch_prepare_hugepage(struct page *page)
> +{
> + struct mmuext_op op;
> + unsigned long pfn, mfn, m;
> + int i;
> + int rc;
> +
> + pfn = page_to_pfn(page);
> + mfn = pfn_to_mfn(pfn);
> + if (mfn & ((HPAGE_SIZE/PAGE_SIZE)-1)) {
> + printk("Guest pages are not properly aligned to use hugepages\n");
>
Don't use printk to do this. Use WARN() to print a stack trace so you
can work out who's doing this.
> + return 1;
> + }
> + for (i = 0, m = mfn; i < HPAGE_SIZE/PAGE_SIZE; i++, pfn++, m++) {
> + if (pfn_to_mfn(pfn) != m) {
> + printk("Guest pages are not properly aligned to use hugepages\n");
>
Isn't this a superset of the previous test?
> + return 1;
> + }
> + }
> + /* It's ok if this fails. We just fall back to the slow refcounting */
> + if (mark_ok) {
> + op.cmd = MMUEXT_MARK_SUPER;
> + op.arg1.mfn = mfn;
> + rc = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
> + if (rc) {
> + if (rc == -ENOSYS) {
> + mark_ok = 0;
> + return 0;
> + }
> + return 1;
> + }
> + }
> + return 0;
> +}
> +
> +void arch_release_hugepage(struct page *page)
> +{
> + struct mmuext_op op;
> +
> + if (mark_ok) {
> + op.cmd = MMUEXT_UNMARK_SUPER;
> + op.arg1.mfn = pfn_to_mfn(page_to_pfn(page));
> + HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
> + }
> +}
> --- next/arch/x86/include/asm/hugetlb.h 2010-06-01 11:41:54.000000000 -0500
> +++ next-fh//arch/x86/include/asm/hugetlb.h 2010-06-02 08:37:58.000000000 -0500
> @@ -95,6 +95,10 @@ static inline int huge_ptep_set_access_f
> return changed;
> }
>
> +#ifdef CONFIG_XEN
> +int arch_prepare_hugepage(struct page *page);
> +void arch_release_hugepage(struct page *page);
> +#else
> static inline int arch_prepare_hugepage(struct page *page)
> {
> return 0;
> @@ -103,5 +107,6 @@ static inline int arch_prepare_hugepage(
> static inline void arch_release_hugepage(struct page *page)
> {
> }
> +#endif
>
That won't work. What if you boot a CONFIG_XEN kernel native?
Something like:
#ifdef CONFIG_XEN
int xen_prepare_hugepage(struct page *page);
#else
static inline int xen_prepare_hugepage(struct page *page)
{
return 0;
}
#endif
static inline int arch_prepare_hugepage(struct page *page)
{
if (xen_pv_domain())
return xen_prepare_hugepage(page);
return 0;
}
might work better.
J
>
> #endif /* _ASM_X86_HUGETLB_H */
> --- next/include/xen/interface/xen.h 2010-06-01 11:41:59.000000000 -0500
> +++ next-fh//include/xen/interface/xen.h 2010-06-02 08:38:50.000000000 -0500
> @@ -167,6 +167,19 @@
> * cmd: MMUEXT_SET_LDT
> * linear_addr: Linear address of LDT base (NB. must be page-aligned).
> * nr_ents: Number of entries in LDT.
> + *
> + * cmd: MMUEXT_CLEAR_PAGE
> + * mfn: Machine frame number to be cleared.
> + *
> + * cmd: MMUEXT_COPY_PAGE
> + * mfn: Machine frame number of the destination page.
> + * src_mfn: Machine frame number of the source page.
> + *
> + * cmd: MMUEXT_MARK_SUPER
> + * mfn: Machine frame number of head of superpage to be marked.
> + *
> + * cmd: MMUEXT_UNMARK_SUPER
> + * mfn: Machine frame number of head of superpage to be cleared.
> */
> #define MMUEXT_PIN_L1_TABLE 0
> #define MMUEXT_PIN_L2_TABLE 1
> @@ -183,6 +196,10 @@
> #define MMUEXT_FLUSH_CACHE 12
> #define MMUEXT_SET_LDT 13
> #define MMUEXT_NEW_USER_BASEPTR 15
> +#define MMUEXT_CLEAR_PAGE 16
> +#define MMUEXT_COPY_PAGE 17
> +#define MMUEXT_MARK_SUPER 19
> +#define MMUEXT_UNMARK_SUPER 20
>
> #ifndef __ASSEMBLY__
> #include <linux/types.h>
>
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Linux PATCH] Mark hugepages to the hypervisor to speed up mapping (with signoff this time)
2010-06-09 18:04 ` Jeremy Fitzhardinge
@ 2010-06-09 18:20 ` Dave McCracken
0 siblings, 0 replies; 3+ messages in thread
From: Dave McCracken @ 2010-06-09 18:20 UTC (permalink / raw)
To: Jeremy Fitzhardinge; +Cc: Xen Developers List
On Wednesday, June 09, 2010, Jeremy Fitzhardinge wrote:
> > + return 1;
> > + }
> > + for (i = 0, m = mfn; i < HPAGE_SIZE/PAGE_SIZE; i++, pfn++, m++) {
> > + if (pfn_to_mfn(pfn) != m) {
> > + printk("Guest pages are not properly aligned to use
> > hugepages\n");
> >
> >
>
> Isn't this a superset of the previous test?
The code is doublechecking to make sure this hugepage is actually contiguous
at the machine level.
Dave McCracken
Oracle Corp.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2010-06-09 18:20 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-09 17:20 [Linux PATCH] Mark hugepages to the hypervisor to speed up mapping (with signoff this time) Dave McCracken
2010-06-09 18:04 ` Jeremy Fitzhardinge
2010-06-09 18:20 ` Dave McCracken
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).