Re: [PATCH] x86/PV: fix unintended dependency of m2p-strict mode on migration-v2

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
	xen-devel <xen-devel@lists.xenproject.org>
Cc: Keir Fraser <keir@xen.org>
Subject: Re: [PATCH] x86/PV: fix unintended dependency of m2p-strict mode on migration-v2
Date: Tue, 12 Jan 2016 11:55:43 +0000	[thread overview]
Message-ID: <5694E9BF.8090005@citrix.com> (raw)
In-Reply-To: <5694DEAA02000078000C5D7A@prv-mh.provo.novell.com>


[-- Attachment #1.1: Type: text/plain, Size: 5375 bytes --]

On 12/01/16 10:08, Jan Beulich wrote:
> This went unnoticed until a backport of this to an older Xen got used,
> causing migration of guests enabling this VM assist to fail, because
> page table pinning there preceeds vCPU context loading, and hence L4
> tables get initialized for the wrong mode. Fix this by post-processing
> L4 tables when setting the intended VM assist flags for the guest.
>
> Note that this leaves in place a dependency on vCPU 0 getting its guest
> context restored first, but afaict the logic here is not the only thing
> depending on that.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1067,8 +1067,48 @@ int arch_set_info_guest(
>          goto out;
>  
>      if ( v->vcpu_id == 0 )
> +    {
>          d->vm_assist = c(vm_assist);
>  
> +        /*
> +         * In the restore case we need to deal with L4 pages which got
> +         * initialized with m2p_strict still clear (and which hence lack the
> +         * correct initial RO_MPT_VIRT_{START,END} L4 entry).
> +         */
> +        if ( d != current->domain && VM_ASSIST(d, m2p_strict) &&
> +             is_pv_domain(d) && !is_pv_32bit_domain(d) &&
> +             atomic_read(&d->arch.pv_domain.nr_l4_pages) )
> +        {
> +            bool_t done = 0;
> +
> +            spin_lock_recursive(&d->page_alloc_lock);
> +
> +            for ( i = 0; ; )
> +            {
> +                struct page_info *page = page_list_remove_head(&d->page_list);
> +
> +                if ( page_lock(page) )
> +                {
> +                    if ( (page->u.inuse.type_info & PGT_type_mask) ==
> +                         PGT_l4_page_table )
> +                        done = !fill_ro_mpt(page_to_mfn(page));
> +
> +                    page_unlock(page);
> +                }
> +
> +                page_list_add_tail(page, &d->page_list);
> +
> +                if ( done || (!(++i & 0xff) && hypercall_preempt_check()) )
> +                    break;
> +            }
> +
> +            spin_unlock_recursive(&d->page_alloc_lock);
> +
> +            if ( !done )
> +                return -ERESTART;

This is a long loop.  It is preemptible, but will incur a time delay
proportional to the size of the domain during the VM downtime. 

Could you defer the loop until after %cr3 has set been set up, and only
enter the loop if the kernel l4 table is missing the RO mappings?  That
way, domains migrated with migration v2 will skip the loop entirely.

> +        }
> +    }
> +
>      rc = put_old_guest_table(current);
>      if ( rc )
>          return rc;
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -1463,13 +1463,20 @@ void init_guest_l4_table(l4_pgentry_t l4
>          l4tab[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty();
>  }
>  
> -void fill_ro_mpt(unsigned long mfn)
> +bool_t fill_ro_mpt(unsigned long mfn)
>  {
>      l4_pgentry_t *l4tab = map_domain_page(_mfn(mfn));
> +    bool_t ret = 0;
>  
> -    l4tab[l4_table_offset(RO_MPT_VIRT_START)] =
> -        idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)];
> +    if ( !l4e_get_intpte(l4tab[l4_table_offset(RO_MPT_VIRT_START)]) )
> +    {
> +        l4tab[l4_table_offset(RO_MPT_VIRT_START)] =
> +            idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)];
> +        ret = 1;

This is a behavioural change.  Previously, the old value was clobbered.

It appears that you are now using this return value to indicate when the
entire pagelist has been walked, but it it relies on the slots being
zero, which is a fragile assumption.

~Andrew

> +    }
>      unmap_domain_page(l4tab);
> +
> +    return ret;
>  }
>  
>  void zap_ro_mpt(unsigned long mfn)
> @@ -1527,10 +1534,15 @@ static int alloc_l4_table(struct page_in
>          adjust_guest_l4e(pl4e[i], d);
>      }
>  
> -    init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict));
> +    if ( rc >= 0 )
> +    {
> +        init_guest_l4_table(pl4e, d, !VM_ASSIST(d, m2p_strict));
> +        atomic_inc(&d->arch.pv_domain.nr_l4_pages);
> +        rc = 0;
> +    }
>      unmap_domain_page(pl4e);
>  
> -    return rc > 0 ? 0 : rc;
> +    return rc;
>  }
>  
>  static void free_l1_table(struct page_info *page)
> @@ -1648,7 +1660,13 @@ static int free_l4_table(struct page_inf
>  
>      unmap_domain_page(pl4e);
>  
> -    return rc > 0 ? 0 : rc;
> +    if ( rc >= 0 )
> +    {
> +        atomic_dec(&d->arch.pv_domain.nr_l4_pages);
> +        rc = 0;
> +    }
> +
> +    return rc;
>  }
>  
>  int page_lock(struct page_info *page)
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -248,6 +248,8 @@ struct pv_domain
>  {
>      l1_pgentry_t **gdt_ldt_l1tab;
>  
> +    atomic_t nr_l4_pages;
> +
>      /* map_domain_page() mapping cache. */
>      struct mapcache_domain mapcache;
>  };
> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -322,7 +322,7 @@ int free_page_type(struct page_info *pag
>  
>  void init_guest_l4_table(l4_pgentry_t[], const struct domain *,
>                           bool_t zap_ro_mpt);
> -void fill_ro_mpt(unsigned long mfn);
> +bool_t fill_ro_mpt(unsigned long mfn);
>  void zap_ro_mpt(unsigned long mfn);
>  
>  int is_iomem_page(unsigned long mfn);
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 6617 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

next prev parent reply	other threads:[~2016-01-12 11:56 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-12 10:08 [PATCH] x86/PV: fix unintended dependency of m2p-strict mode on migration-v2 Jan Beulich
2016-01-12 11:55 ` Andrew Cooper [this message]
2016-01-12 15:19   ` Jan Beulich
2016-01-13 15:25     ` Andrew Cooper
2016-01-13 15:36       ` Jan Beulich
2016-01-13 16:00         ` Andrew Cooper
2016-01-13 16:15           ` Jan Beulich
2016-02-01 13:20             ` Jan Beulich
2016-02-01 14:07               ` Andrew Cooper
2016-02-01 16:28                 ` Jan Beulich
2016-02-01 16:34                   ` Andrew Cooper
2016-02-01 16:51                     ` Jan Beulich
2016-02-01 17:31                       ` Andrew Cooper
2016-02-02 10:21                         ` Jan Beulich
2016-02-02 14:08                         ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5694E9BF.8090005@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=keir@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).