From: Keir Fraser <keir.fraser@eu.citrix.com>
To: Jan Beulich <jbeulich@novell.com>, xen-devel@lists.xensource.com
Subject: Re: [PATCH] x86: fix preemptable page type handling (v2)
Date: Thu, 30 Oct 2008 14:46:57 +0000 [thread overview]
Message-ID: <C52F7961.28970%keir.fraser@eu.citrix.com> (raw)
In-Reply-To: <4909B652.76E4.0078.0@novell.com>
Why does this patch not do what the new comment says and directly get_page()
when PGT_partial is set and put_page() when PGT_partial is cleared? Doing
get_page() across all __put_page() operations and then skipping the
put_page() in some cases, for example, seems inefficient and also makes the
code less clear. Why would you do it that way?
-- Keir
On 30/10/08 12:27, "Jan Beulich" <jbeulich@novell.com> wrote:
> - retain a page reference when PGT_partial is set on a page (and drop
> it when clearing that flag)
> - don't drop a page reference never acquired when freeing the page type
> of a page where the allocation of the type got preempted (and never
> completed)
> - don't acquire a page reference when allocating the page type of a
> page where freeing the type got preempted (and never completed, and
> hence didn't drop the respective reference)
>
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
>
> Index: 2008-10-27/xen/arch/x86/domain.c
> ===================================================================
> --- 2008-10-27.orig/xen/arch/x86/domain.c 2008-10-29 17:10:41.000000000 +0100
> +++ 2008-10-27/xen/arch/x86/domain.c 2008-10-29 17:11:51.000000000 +0100
> @@ -1683,18 +1683,24 @@ static int relinquish_memory(
> break;
> case -EINTR:
> page->u.inuse.type_info |= PGT_validated;
> + if ( x & PGT_partial )
> + put_page(page);
> put_page(page);
> ret = -EAGAIN;
> goto out;
> case -EAGAIN:
> page->u.inuse.type_info |= PGT_partial;
> - put_page(page);
> + if ( x & PGT_partial )
> + put_page(page);
> goto out;
> default:
> BUG();
> }
> if ( x & PGT_partial )
> + {
> page->u.inuse.type_info--;
> + put_page(page);
> + }
> break;
> }
> }
> Index: 2008-10-27/xen/arch/x86/mm.c
> ===================================================================
> --- 2008-10-27.orig/xen/arch/x86/mm.c 2008-10-28 14:53:16.000000000 +0100
> +++ 2008-10-27/xen/arch/x86/mm.c 2008-10-29 16:46:52.000000000 +0100
> @@ -566,19 +566,21 @@ static int get_page_from_pagenr(unsigned
> static int get_page_and_type_from_pagenr(unsigned long page_nr,
> unsigned long type,
> struct domain *d,
> + int partial,
> int preemptible)
> {
> struct page_info *page = mfn_to_page(page_nr);
> int rc;
>
> - if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
> + if ( likely(partial >= 0) &&
> + unlikely(!get_page_from_pagenr(page_nr, d)) )
> return -EINVAL;
>
> rc = (preemptible ?
> get_page_type_preemptible(page, type) :
> (get_page_type(page, type) ? 0 : -EINVAL));
>
> - if ( rc )
> + if ( unlikely(rc) && partial >= 0 )
> put_page(page);
>
> return rc;
> @@ -761,7 +763,7 @@ get_page_from_l2e(
> }
>
> rc = get_page_and_type_from_pagenr(
> - l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
> + l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0);
> if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
> rc = 0;
>
> @@ -772,7 +774,7 @@ get_page_from_l2e(
> define_get_linear_pagetable(l3);
> static int
> get_page_from_l3e(
> - l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
> + l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int
> preemptible)
> {
> int rc;
>
> @@ -786,7 +788,7 @@ get_page_from_l3e(
> }
>
> rc = get_page_and_type_from_pagenr(
> - l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
> + l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible);
> if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
> rc = 0;
>
> @@ -797,7 +799,7 @@ get_page_from_l3e(
> define_get_linear_pagetable(l4);
> static int
> get_page_from_l4e(
> - l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
> + l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int
> preemptible)
> {
> int rc;
>
> @@ -811,7 +813,7 @@ get_page_from_l4e(
> }
>
> rc = get_page_and_type_from_pagenr(
> - l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
> + l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible);
> if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
> rc = 0;
>
> @@ -961,23 +963,32 @@ static int put_page_from_l2e(l2_pgentry_
> return 1;
> }
>
> +static int __put_page_type(struct page_info *, int preemptible);
>
> static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
> - int preemptible)
> + int partial, int preemptible)
> {
> if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
> (l3e_get_pfn(l3e) != pfn) )
> + {
> + if ( unlikely(partial > 0) )
> + return __put_page_type(l3e_get_page(l3e), preemptible);
> return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
> + }
> return 1;
> }
>
> #if CONFIG_PAGING_LEVELS >= 4
> static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
> - int preemptible)
> + int partial, int preemptible)
> {
> if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
> (l4e_get_pfn(l4e) != pfn) )
> + {
> + if ( unlikely(partial > 0) )
> + return __put_page_type(l4e_get_page(l4e), preemptible);
> return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
> + }
> return 1;
> }
> #endif
> @@ -1184,7 +1195,7 @@ static int alloc_l3_table(struct page_in
> unsigned long pfn = page_to_mfn(page);
> l3_pgentry_t *pl3e;
> unsigned int i;
> - int rc = 0;
> + int rc = 0, partial = page->partial_pte;
>
> #if CONFIG_PAGING_LEVELS == 3
> /*
> @@ -1213,7 +1224,8 @@ static int alloc_l3_table(struct page_in
> if ( is_pv_32on64_domain(d) )
> memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
>
> - for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
> + for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
> + i++, partial = 0 )
> {
> if ( is_pv_32bit_domain(d) && (i == 3) )
> {
> @@ -1224,16 +1236,17 @@ static int alloc_l3_table(struct page_in
> rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
> PGT_l2_page_table |
> PGT_pae_xen_l2,
> - d, preemptible);
> + d, partial, preemptible);
> }
> else if ( !is_guest_l3_slot(i) ||
> - (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0
> )
> + (rc = get_page_from_l3e(pl3e[i], pfn, d,
> + partial, preemptible)) > 0 )
> continue;
>
> if ( rc == -EAGAIN )
> {
> page->nr_validated_ptes = i;
> - page->partial_pte = 1;
> + page->partial_pte = partial ?: 1;
> }
> else if ( rc == -EINTR && i )
> {
> @@ -1257,7 +1270,7 @@ static int alloc_l3_table(struct page_in
> if ( !is_guest_l3_slot(i) )
> continue;
> unadjust_guest_l3e(pl3e[i], d);
> - put_page_from_l3e(pl3e[i], pfn, 0);
> + put_page_from_l3e(pl3e[i], pfn, 0, 0);
> }
> }
>
> @@ -1272,18 +1285,20 @@ static int alloc_l4_table(struct page_in
> unsigned long pfn = page_to_mfn(page);
> l4_pgentry_t *pl4e = page_to_virt(page);
> unsigned int i;
> - int rc = 0;
> + int rc = 0, partial = page->partial_pte;
>
> - for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
> + for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
> + i++, partial = 0 )
> {
> if ( !is_guest_l4_slot(d, i) ||
> - (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
> + (rc = get_page_from_l4e(pl4e[i], pfn, d,
> + partial, preemptible)) > 0 )
> continue;
>
> if ( rc == -EAGAIN )
> {
> page->nr_validated_ptes = i;
> - page->partial_pte = 1;
> + page->partial_pte = partial ?: 1;
> }
> else if ( rc == -EINTR )
> {
> @@ -1299,7 +1314,7 @@ static int alloc_l4_table(struct page_in
> MEM_LOG("Failure in alloc_l4_table: entry %d", i);
> while ( i-- > 0 )
> if ( is_guest_l4_slot(d, i) )
> - put_page_from_l4e(pl4e[i], pfn, 0);
> + put_page_from_l4e(pl4e[i], pfn, 0, 0);
> }
> if ( rc < 0 )
> return rc;
> @@ -1377,19 +1392,20 @@ static int free_l3_table(struct page_inf
> struct domain *d = page_get_owner(page);
> unsigned long pfn = page_to_mfn(page);
> l3_pgentry_t *pl3e;
> - unsigned int i = page->nr_validated_ptes - !page->partial_pte;
> - int rc = 0;
> + int rc = 0, partial = page->partial_pte;
> + unsigned int i = page->nr_validated_ptes - !partial;
>
> pl3e = map_domain_page(pfn);
>
> do {
> if ( is_guest_l3_slot(i) )
> {
> - rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
> + rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible);
> + if ( rc < 0 )
> + break;
> + partial = 0;
> if ( rc > 0 )
> continue;
> - if ( rc )
> - break;
> unadjust_guest_l3e(pl3e[i], d);
> }
> } while ( i-- );
> @@ -1399,7 +1415,7 @@ static int free_l3_table(struct page_inf
> if ( rc == -EAGAIN )
> {
> page->nr_validated_ptes = i;
> - page->partial_pte = 1;
> + page->partial_pte = partial ?: -1;
> }
> else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
> {
> @@ -1416,18 +1432,21 @@ static int free_l4_table(struct page_inf
> struct domain *d = page_get_owner(page);
> unsigned long pfn = page_to_mfn(page);
> l4_pgentry_t *pl4e = page_to_virt(page);
> - unsigned int i = page->nr_validated_ptes - !page->partial_pte;
> - int rc = 0;
> + int rc = 0, partial = page->partial_pte;
> + unsigned int i = page->nr_validated_ptes - !partial;
>
> do {
> if ( is_guest_l4_slot(d, i) )
> - rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
> - } while ( rc >= 0 && i-- );
> + rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible);
> + if ( rc < 0 )
> + break;
> + partial = 0;
> + } while ( i-- );
>
> if ( rc == -EAGAIN )
> {
> page->nr_validated_ptes = i;
> - page->partial_pte = 1;
> + page->partial_pte = partial ?: -1;
> }
> else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
> {
> @@ -1703,7 +1722,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
> return rc ? 0 : -EFAULT;
> }
>
> - rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
> + rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
> if ( unlikely(rc < 0) )
> return page_unlock(l3pg), rc;
> rc = 0;
> @@ -1732,7 +1751,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
> }
>
> page_unlock(l3pg);
> - put_page_from_l3e(ol3e, pfn, 0);
> + put_page_from_l3e(ol3e, pfn, 0, 0);
> return rc;
> }
>
> @@ -1781,7 +1800,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
> return rc ? 0 : -EFAULT;
> }
>
> - rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
> + rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
> if ( unlikely(rc < 0) )
> return page_unlock(l4pg), rc;
> rc = 0;
> @@ -1802,7 +1821,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
> }
>
> page_unlock(l4pg);
> - put_page_from_l4e(ol4e, pfn, 0);
> + put_page_from_l4e(ol4e, pfn, 0, 0);
> return rc;
> }
>
> @@ -1866,6 +1885,10 @@ static int alloc_page_type(struct page_i
> struct domain *owner = page_get_owner(page);
> int rc;
>
> + /* Obtain an extra reference to retain if we set PGT_partial. */
> + if ( preemptible && !get_page(page, owner) )
> + return -EINVAL;
> +
> /* A page table is dirtied when its type count becomes non-zero. */
> if ( likely(owner != NULL) )
> paging_mark_dirty(owner, page_to_mfn(page));
> @@ -1900,8 +1923,13 @@ static int alloc_page_type(struct page_i
> if ( rc == -EAGAIN )
> {
> page->u.inuse.type_info |= PGT_partial;
> + return -EAGAIN;
> }
> - else if ( rc == -EINTR )
> +
> + if ( preemptible )
> + put_page(page);
> +
> + if ( rc == -EINTR )
> {
> ASSERT((page->u.inuse.type_info &
> (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
> @@ -2020,11 +2048,16 @@ static int __put_page_type_final(struct
> case -EAGAIN:
> wmb();
> page->u.inuse.type_info |= PGT_partial;
> + /* Must skip put_page() below. */
> + preemptible = 0;
> break;
> default:
> BUG();
> }
>
> + if ( preemptible )
> + put_page(page);
> +
> return rc;
> }
>
> @@ -2034,6 +2067,10 @@ static int __put_page_type(struct page_i
> {
> unsigned long nx, x, y = page->u.inuse.type_info;
>
> + /* Obtain an extra reference to retain if we set PGT_partial. */
> + if ( preemptible && !get_page(page, page_get_owner(page)) )
> + return -EINVAL;
> +
> for ( ; ; )
> {
> x = y;
> @@ -2055,6 +2092,8 @@ static int __put_page_type(struct page_i
> if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
> x, nx)) != x) )
> continue;
> + if ( x & PGT_partial )
> + put_page(page);
> /* We cleared the 'valid bit' so we do the clean up. */
> return __put_page_type_final(page, x, preemptible);
> }
> @@ -2075,9 +2114,16 @@ static int __put_page_type(struct page_i
> break;
>
> if ( preemptible && hypercall_preempt_check() )
> + {
> + if ( preemptible )
> + put_page(page);
> return -EINTR;
> + }
> }
>
> + if ( preemptible )
> + put_page(page);
> +
> return 0;
> }
>
> @@ -2181,7 +2227,11 @@ static int __get_page_type(struct page_i
> }
>
> if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
> + {
> + if ( (x & PGT_partial) && !(nx & PGT_partial) )
> + put_page(page);
> break;
> + }
>
> if ( preemptible && hypercall_preempt_check() )
> return -EINTR;
> @@ -2290,7 +2340,7 @@ int new_guest_cr3(unsigned long mfn)
> #endif
> okay = paging_mode_refcounts(d)
> ? get_page_from_pagenr(mfn, d)
> - : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
> + : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
> if ( unlikely(!okay) )
> {
> MEM_LOG("Error while installing new baseptr %lx", mfn);
> @@ -2534,7 +2584,7 @@ int do_mmuext_op(
> if ( paging_mode_refcounts(FOREIGNDOM) )
> break;
>
> - rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
> + rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 0, 1);
> okay = !rc;
> if ( unlikely(!okay) )
> {
> @@ -2615,7 +2665,7 @@ int do_mmuext_op(
> okay = get_page_from_pagenr(mfn, d);
> else
> okay = !get_page_and_type_from_pagenr(
> - mfn, PGT_root_page_table, d, 0);
> + mfn, PGT_root_page_table, d, 0, 0);
> if ( unlikely(!okay) )
> {
> MEM_LOG("Error while installing new mfn %lx", mfn);
> @@ -2722,7 +2772,7 @@ int do_mmuext_op(
> unsigned char *ptr;
>
> okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
> - FOREIGNDOM, 0);
> + FOREIGNDOM, 0, 0);
> if ( unlikely(!okay) )
> {
> MEM_LOG("Error while clearing mfn %lx", mfn);
> @@ -2755,7 +2805,7 @@ int do_mmuext_op(
> }
>
> okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
> - FOREIGNDOM, 0);
> + FOREIGNDOM, 0, 0);
> if ( unlikely(!okay) )
> {
> put_page(mfn_to_page(src_mfn));
> Index: 2008-10-27/xen/include/asm-x86/mm.h
> ===================================================================
> --- 2008-10-27.orig/xen/include/asm-x86/mm.h 2008-10-29 17:09:06.000000000
> +0100
> +++ 2008-10-27/xen/include/asm-x86/mm.h 2008-10-30 13:25:44.000000000 +0100
> @@ -61,12 +61,36 @@ struct page_info
> /*
> * When PGT_partial is true then this field is valid and indicates
> * that PTEs in the range [0, @nr_validated_ptes) have been
> validated.
> - * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
> - * partially validated.
> + * An extra page reference must be acquired (or not dropped) whenever
> + * PGT_partial gets set, and it must be dropped when the flag gets
> + * cleared. This is so that a get() leaving a page in partially
> + * validated state (where the caller would drop the reference
> acquired
> + * due to the getting of the type [apparently] failing [-EAGAIN])
> + * would not accidentally result in a page left with zero general
> + * reference count, but non-zero type reference count (possible when
> + * the partial get() is followed immediately by domain destruction).
> + * Likewise, the ownership of the single type reference for partially
> + * (in-)validated pages is tied to this flag, i.e. the instance
> + * setting the flag must not drop that reference, whereas the
> instance
> + * clearing it will have to.
> + *
> + * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
> + * been partially validated. This implies that the general reference
> + * to the page (acquired from get_page_from_lNe()) would be dropped
> + * (again due to the apparent failure) and hence must be re-acquired
> + * when resuming the validation, but must not be dropped when picking
> + * up the page for invalidation.
> + *
> + * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
> + * been partially invalidated. This is basically the opposite case of
> + * above, i.e. the general reference to the page was not dropped in
> + * put_page_from_lNe() (due to the apparent failure), and hence it
> + * must be dropped when the put operation is resumed (and completes),
> + * but it must not be acquired if picking up the page for validation.
> */
> struct {
> u16 nr_validated_ptes;
> - bool_t partial_pte;
> + s8 partial_pte;
> };
>
> /*
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
next prev parent reply other threads:[~2008-10-30 14:46 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-30 12:27 [PATCH] x86: fix preemptable page type handling (v2) Jan Beulich
2008-10-30 14:46 ` Keir Fraser [this message]
2008-10-30 14:56 ` [PATCH] x86: fix preemptable page type handling(v2) Jan Beulich
2008-10-30 15:00 ` Keir Fraser
2008-10-30 17:03 ` [PATCH] x86: fix preemptable page typehandling(v2) Jan Beulich
2008-10-30 17:27 ` Keir Fraser
2008-10-30 21:19 ` Use of the "has_shutdown_code" code in v3.2 Roger Cruz
2008-10-30 22:02 ` Keir Fraser
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=C52F7961.28970%keir.fraser@eu.citrix.com \
--to=keir.fraser@eu.citrix.com \
--cc=jbeulich@novell.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.