All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Dave McCracken <dcm@mccr.org>
Cc: Xen Developers List <xen-devel@lists.xensource.com>
Subject: Re: [Linux PATCH] Mark hugepages to the hypervisor to speed up mapping (with signoff this time)
Date: Wed, 09 Jun 2010 11:04:00 -0700	[thread overview]
Message-ID: <4C0FD790.3070603@goop.org> (raw)
In-Reply-To: <20100609172016.19083.9014.sendpatchset@magnum.int.mccr.org>

On 06/09/2010 10:20 AM, Dave McCracken wrote:
> Two new hypercalls were recently added to the Xen hypervisor to mark
> and unmark hugepages as in use.  Once a hugepage has been marked,
> actually mapping the hugepage is sped up by a significant amount (more
> than 20 times faster).
>
> This patch marks hugepages when they are added to the hugepage pool
> and unmarks them when they are removed.  It also checks to make sure
> each hugepage is contiguous at the machine level.
>
> Signed-off-by: Dave McCracken <dave.mccracken@oracle.com>
>
> --------
>
>
> --- next/arch/x86/xen/mmu.c	2010-06-01 11:41:54.000000000 -0500
> +++ next-fh//arch/x86/xen/mmu.c	2010-06-02 08:45:01.000000000 -0500
> @@ -2531,3 +2531,51 @@ static int __init xen_mmu_debugfs(void)
>  fs_initcall(xen_mmu_debugfs);
>  
>  #endif	/* CONFIG_XEN_DEBUG_FS */
> +
> +static int mark_ok = 1;
>   

__read_mostly

> +
> +int arch_prepare_hugepage(struct page *page)
> +{
> +	struct mmuext_op op;
> +	unsigned long pfn, mfn, m;
> +	int i;
> +	int rc;
> +
> +	pfn = page_to_pfn(page);
> +	mfn = pfn_to_mfn(pfn);
> +	if (mfn & ((HPAGE_SIZE/PAGE_SIZE)-1)) {
> +		printk("Guest pages are not properly aligned to use hugepages\n");
>   

Don't use printk to do this.  Use WARN() to print a stack trace so you
can work out who's doing this.

> +		return 1;
> +	}
> +	for (i = 0, m = mfn; i < HPAGE_SIZE/PAGE_SIZE; i++, pfn++, m++) {
> +		if (pfn_to_mfn(pfn) != m) {
> +			printk("Guest pages are not properly aligned to use hugepages\n");
>   
Isn't this a superset of the previous test?

> +			return 1;
> +		}
> +	}
> +	/* It's ok if this fails.  We just fall back to the slow refcounting */
> +	if (mark_ok) {
> +		op.cmd = MMUEXT_MARK_SUPER;
> +		op.arg1.mfn = mfn;
> +		rc = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
> +		if (rc) {
> +			if (rc == -ENOSYS) {
> +				mark_ok = 0;
> +				return 0;
> +			}
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +void arch_release_hugepage(struct page *page)
> +{
> +	struct mmuext_op op;
> +
> +	if (mark_ok) {
> +		op.cmd = MMUEXT_UNMARK_SUPER;
> +		op.arg1.mfn = pfn_to_mfn(page_to_pfn(page));
> +		HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
> +	}
> +}
> --- next/arch/x86/include/asm/hugetlb.h	2010-06-01 11:41:54.000000000 -0500
> +++ next-fh//arch/x86/include/asm/hugetlb.h	2010-06-02 08:37:58.000000000 -0500
> @@ -95,6 +95,10 @@ static inline int huge_ptep_set_access_f
>  	return changed;
>  }
>  
> +#ifdef CONFIG_XEN
> +int arch_prepare_hugepage(struct page *page);
> +void arch_release_hugepage(struct page *page);
> +#else
>  static inline int arch_prepare_hugepage(struct page *page)
>  {
>  	return 0;
> @@ -103,5 +107,6 @@ static inline int arch_prepare_hugepage(
>  static inline void arch_release_hugepage(struct page *page)
>  {
>  }
> +#endif
>   

That won't work.  What if you boot a CONFIG_XEN kernel native?

Something like:

#ifdef CONFIG_XEN
int xen_prepare_hugepage(struct page *page);
#else
static inline int xen_prepare_hugepage(struct page *page)
{
	return 0;
}
#endif

static inline int arch_prepare_hugepage(struct page *page)
{
	if (xen_pv_domain())
		return xen_prepare_hugepage(page);
	return 0;
}

might work better.

    J

>  
>  #endif /* _ASM_X86_HUGETLB_H */
> --- next/include/xen/interface/xen.h	2010-06-01 11:41:59.000000000 -0500
> +++ next-fh//include/xen/interface/xen.h	2010-06-02 08:38:50.000000000 -0500
> @@ -167,6 +167,19 @@
>   * cmd: MMUEXT_SET_LDT
>   * linear_addr: Linear address of LDT base (NB. must be page-aligned).
>   * nr_ents: Number of entries in LDT.
> + *
> + * cmd: MMUEXT_CLEAR_PAGE
> + * mfn: Machine frame number to be cleared.
> + *
> + * cmd: MMUEXT_COPY_PAGE
> + * mfn: Machine frame number of the destination page.
> + * src_mfn: Machine frame number of the source page.
> + *
> + * cmd: MMUEXT_MARK_SUPER
> + * mfn: Machine frame number of head of superpage to be marked.
> + *
> + * cmd: MMUEXT_UNMARK_SUPER
> + * mfn: Machine frame number of head of superpage to be cleared.
>   */
>  #define MMUEXT_PIN_L1_TABLE      0
>  #define MMUEXT_PIN_L2_TABLE      1
> @@ -183,6 +196,10 @@
>  #define MMUEXT_FLUSH_CACHE      12
>  #define MMUEXT_SET_LDT          13
>  #define MMUEXT_NEW_USER_BASEPTR 15
> +#define MMUEXT_CLEAR_PAGE       16
> +#define MMUEXT_COPY_PAGE        17
> +#define MMUEXT_MARK_SUPER       19
> +#define MMUEXT_UNMARK_SUPER     20
>  
>  #ifndef __ASSEMBLY__
>  #include <linux/types.h>
>
>   

  reply	other threads:[~2010-06-09 18:04 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-09 17:20 [Linux PATCH] Mark hugepages to the hypervisor to speed up mapping (with signoff this time) Dave McCracken
2010-06-09 18:04 ` Jeremy Fitzhardinge [this message]
2010-06-09 18:20   ` Dave McCracken

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4C0FD790.3070603@goop.org \
    --to=jeremy@goop.org \
    --cc=dcm@mccr.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.