All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Malcolm Crossley <malcolm.crossley@citrix.com>, xen-devel@lists.xen.org
Cc: stefano.stabellini@citrix.com, keir@xen.org,
	ian.campbell@citrix.com, jbeulich@suse.com, tim@xen.org
Subject: Re: [PATCH RFC] xen/gnttab: Avoid TLB flush if the grant mapped page was not accessed
Date: Fri, 4 Jul 2014 14:21:31 +0100	[thread overview]
Message-ID: <53B6AA5B.3000607@citrix.com> (raw)
In-Reply-To: <38f32002937183678ae4.1404478475@malcolmc.uk.xensource.com>

On 04/07/14 13:54, Malcolm Crossley wrote:
> Implement an x86 specific optimisation to avoid a TLB flush if the grant
> mapped page was not accessed by a CPU whilst it was mapped.
>
> The optimisation works by checking the accessed page bit of the pte to
> determine if the page has been accessed by a cpu. The x86 specification details
> that the processor atomically sets the accessed bit before caching a PTE into
> a TLB.
>
> The grant mapped pte is now setup without the accessed or dirty bit's set and
> so we can determine if any CPU has accessed that page via the grant mapping
> pte.
>
> I believe Xen drops all attempts by guest to clear the accessed or dirty bit's
> on the pte because Xen is the pte owner. This should prevent a malicous guest
> from delibrately circumventing the TLB flush.
>
> Blkback benefits from this optimistation because it does not access the data
> itself from the CPU.
>
> Netback can benefit from the optimistation depending on how much of the header
> is put into the first fragment because the first fragment is grant copied on
> recent netback implementations.
>
> Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>

Probably worth a comment regarding the improvements we have observed. 
100% reduction of TLB flushes on the blk path and between 80~90%
reduction on the net path.

>
> diff -r 3e0ee081c616 -r 38f320029371 xen/arch/arm/mm.c
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -1216,7 +1216,7 @@ int create_grant_host_mapping(unsigned l
>  }
>  
>  int replace_grant_host_mapping(unsigned long addr, unsigned long mfn,
> -        unsigned long new_addr, unsigned int flags)
> +        unsigned long new_addr, unsigned int flags, int *page_accessed)

pragmatically, this should be a bool_t rather than an int.

>  {
>      unsigned long gfn = (unsigned long)(addr >> PAGE_SHIFT);
>      struct domain *d = current->domain;
> @@ -1225,6 +1225,7 @@ int replace_grant_host_mapping(unsigned 
>          return GNTST_general_error;
>  
>      guest_physmap_remove_page(d, gfn, mfn, 0);
> +    *page_accessed = 1;
>  
>      return GNTST_okay;
>  }
> diff -r 3e0ee081c616 -r 38f320029371 xen/arch/x86/mm.c
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -3901,7 +3901,8 @@ static int create_grant_va_mapping(
>  }
>  
>  static int replace_grant_va_mapping(
> -    unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v)
> +    unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v,
> +    int *page_accessed)
>  {
>      l1_pgentry_t *pl1e, ol1e;
>      unsigned long gl1mfn;
> @@ -3947,12 +3948,13 @@ static int replace_grant_va_mapping(
>      }
>  
>      /* Delete pagetable entry. */
> -    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
> +    if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 1)) )
>      {
>          MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
>          rc = GNTST_general_error;
>          goto unlock_and_out;
>      }
> +    *page_accessed = !!(l1e_get_flags(*pl1e) & _PAGE_ACCESSED);
>  
>   unlock_and_out:
>      page_unlock(l1pg);
> @@ -3963,9 +3965,9 @@ static int replace_grant_va_mapping(
>  }
>  
>  static int destroy_grant_va_mapping(
> -    unsigned long addr, unsigned long frame, struct vcpu *v)
> +    unsigned long addr, unsigned long frame, struct vcpu *v, int *page_accessed)
>  {
> -    return replace_grant_va_mapping(addr, frame, l1e_empty(), v);
> +    return replace_grant_va_mapping(addr, frame, l1e_empty(), v, page_accessed);
>  }
>  
>  static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame,
> @@ -4000,7 +4002,7 @@ int create_grant_host_mapping(uint64_t a
>          return create_grant_p2m_mapping(addr, frame, flags, cache_flags);
>  
>      grant_pte_flags =
> -        _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB;
> +        _PAGE_PRESENT | _PAGE_GNTTAB;
>      if ( cpu_has_nx )
>          grant_pte_flags |= _PAGE_NX_BIT;
>  
> @@ -4048,7 +4050,8 @@ static int replace_grant_p2m_mapping(
>  }
>  
>  int replace_grant_host_mapping(
> -    uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int flags)
> +    uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int flags,
> +    int *page_accessed)
>  {
>      struct vcpu *curr = current;
>      l1_pgentry_t *pl1e, ol1e;
> @@ -4069,7 +4072,7 @@ int replace_grant_host_mapping(
>      }
>  
>      if ( !new_addr )
> -        return destroy_grant_va_mapping(addr, frame, curr);
> +        return destroy_grant_va_mapping(addr, frame, curr, page_accessed);
>  
>      pl1e = guest_map_l1e(curr, new_addr, &gl1mfn);
>      if ( !pl1e )
> @@ -4117,7 +4120,7 @@ int replace_grant_host_mapping(
>      put_page(l1pg);
>      guest_unmap_l1e(curr, pl1e);
>  
> -    rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
> +    rc = replace_grant_va_mapping(addr, frame, ol1e, curr, page_accessed);
>      if ( rc && !paging_mode_refcounts(curr->domain) )
>          put_page_from_l1e(ol1e, curr->domain);
>  
> diff -r 3e0ee081c616 -r 38f320029371 xen/common/grant_table.c
> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -73,6 +73,7 @@ struct gnttab_unmap_common {
>  
>      /* Return */
>      int16_t status;
> +    int page_accessed;
>  
>      /* Shared state beteen *_unmap and *_unmap_complete */
>      u16 flags;
> @@ -81,6 +82,12 @@ struct gnttab_unmap_common {
>      struct domain *rd;
>  };
>  
> +#ifndef NDEBUG
> +atomic_t grant_unmap_tlb_flush_avoided;
> +atomic_t grant_unmap_tlb_flush_done;
> +atomic_t grant_unmap_operations;
> +#endif

While these were useful during development, they would not be useful if
the patch were committed to xen.  I suggest they be dropped, and
possibly provided as second "for people interested in the savings they
are getting" patch.

> +
>  /* Number of unmap operations that are done between each tlb flush */
>  #define GNTTAB_UNMAP_BATCH_SIZE 32
>  
> @@ -844,6 +851,7 @@ static void
>      ld = current->domain;
>      lgt = ld->grant_table;
>  
> +    op->page_accessed = 0;
>      op->frame = (unsigned long)(op->dev_bus_addr >> PAGE_SHIFT);
>  
>      if ( unlikely(op->handle >= lgt->maptrack_limit) )
> @@ -924,7 +932,7 @@ static void
>      {
>          if ( (rc = replace_grant_host_mapping(op->host_addr,
>                                                op->frame, op->new_addr, 
> -                                              op->flags)) < 0 )
> +                                              op->flags, &op->page_accessed)) < 0 )
>              goto unmap_out;
>  
>          ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
> @@ -1082,20 +1090,25 @@ static long
>  gnttab_unmap_grant_ref(
>      XEN_GUEST_HANDLE_PARAM(gnttab_unmap_grant_ref_t) uop, unsigned int count)
>  {
> -    int i, c, partial_done, done = 0;
> +    int i, c, partial_done, done = 0, grant_map_accessed = 0;
>      struct gnttab_unmap_grant_ref op;
>      struct gnttab_unmap_common common[GNTTAB_UNMAP_BATCH_SIZE];
>  
> +#ifndef NDEBUG
> +    atomic_inc(&grant_unmap_operations);
> +#endif
>      while ( count != 0 )
>      {
>          c = min(count, (unsigned int)GNTTAB_UNMAP_BATCH_SIZE);
>          partial_done = 0;
> +        grant_map_accessed = 0;
>  
>          for ( i = 0; i < c; i++ )
>          {
>              if ( unlikely(__copy_from_guest(&op, uop, 1)) )
>                  goto fault;
>              __gnttab_unmap_grant_ref(&op, &(common[i]));
> +            grant_map_accessed |= common[i].page_accessed;
>              ++partial_done;
>              if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
>                  goto fault;
> @@ -1103,6 +1116,14 @@ gnttab_unmap_grant_ref(
>          }
>  
>          gnttab_flush_tlb(current->domain);
> +        if ( grant_map_accessed ) {

Coding style.

~Andrew

  reply	other threads:[~2014-07-04 13:21 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-04 12:54 [PATCH RFC] xen/gnttab: Avoid TLB flush if the grant mapped page was not accessed Malcolm Crossley
2014-07-04 13:21 ` Andrew Cooper [this message]
2014-07-04 13:28 ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53B6AA5B.3000607@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=keir@xen.org \
    --cc=malcolm.crossley@citrix.com \
    --cc=stefano.stabellini@citrix.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.