qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Palmer Dabbelt <palmer@sifive.com>
Cc: Richard Henderson <richard.henderson@linaro.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 2/3] i386: Atomically update PTEs with mttcg
Date: Fri, 14 Dec 2018 11:05:08 +1100	[thread overview]
Message-ID: <c59aabb14c0b89257db6c136300df24d583977b3.camel@kernel.crashing.org> (raw)
In-Reply-To: <20181213235804.14956-2-benh@kernel.crashing.org>

Note to RiscV folks: You may want to adapt your code to do the same as
this, esp. afaik, what you do today is endian-broken if host and guest
endian are different.

Cheers,
Ben. 

On Fri, 2018-12-14 at 10:58 +1100, Benjamin Herrenschmidt wrote:
> Afaik, this isn't well documented (at least it wasn't when I last looked)
> but OSes such as Linux rely on this behaviour:
> 
> The HW updates to the page tables need to be done atomically with the
> checking of the present bit (and other permissions).
> 
> This is what allows Linux to do simple xchg of PTEs with 0 and assume
> the value read has "final" stable dirty and accessed bits (the TLB
> invalidation is deferred).
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>  target/i386/excp_helper.c | 104 +++++++++++++++++++++++++++++---------
>  1 file changed, 80 insertions(+), 24 deletions(-)
> 
> diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c
> index 49231f6b69..93fc24c011 100644
> --- a/target/i386/excp_helper.c
> +++ b/target/i386/excp_helper.c
> @@ -157,11 +157,45 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>  
>  #else
>  
> +static inline uint64_t update_entry(CPUState *cs, target_ulong addr,
> +                                    uint64_t orig_entry, uint32_t bits)
> +{
> +    uint64_t new_entry = orig_entry | bits;
> +
> +    /* Write the updated bottom 32-bits */
> +    if (qemu_tcg_mttcg_enabled()) {
> +        uint32_t old_le = cpu_to_le32(orig_entry);
> +        uint32_t new_le = cpu_to_le32(new_entry);
> +        MemTxResult result;
> +        uint32_t old_ret;
> +
> +        old_ret = address_space_cmpxchgl_notdirty(cs->as, addr,
> +                                                  old_le, new_le,
> +                                                  MEMTXATTRS_UNSPECIFIED,
> +                                                  &result);
> +        if (result == MEMTX_OK) {
> +            if (old_ret != old_le && old_ret != new_le) {
> +                new_entry = 0;
> +            }
> +            return new_entry;
> +        }
> +
> +        /* Do we need to support this case where PTEs aren't in RAM ?
> +         *
> +         * For now fallback to non-atomic case
> +         */
> +    }
> +
> +    x86_stl_phys_notdirty(cs, addr, new_entry);
> +
> +    return new_entry;
> +}
> +
>  static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>                          int *prot)
>  {
>      CPUX86State *env = &X86_CPU(cs)->env;
> -    uint64_t rsvd_mask = PG_HI_RSVD_MASK;
> +    uint64_t rsvd_mask;
>      uint64_t ptep, pte;
>      uint64_t exit_info_1 = 0;
>      target_ulong pde_addr, pte_addr;
> @@ -172,6 +206,8 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>          return gphys;
>      }
>  
> + restart:
> +    rsvd_mask = PG_HI_RSVD_MASK;
>      if (!(env->nested_pg_mode & SVM_NPT_NXE)) {
>          rsvd_mask |= PG_NX_MASK;
>      }
> @@ -198,8 +234,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>                  goto do_fault_rsvd;
>              }
>              if (!(pml4e & PG_ACCESSED_MASK)) {
> -                pml4e |= PG_ACCESSED_MASK;
> -                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
> +                pml4e = update_entry(cs, pml4e_addr, pml4e, PG_ACCESSED_MASK);
> +                if (!pml4e) {
> +                    goto restart;
> +                }
>              }
>              ptep &= pml4e ^ PG_NX_MASK;
>              pdpe_addr = (pml4e & PG_ADDRESS_MASK) +
> @@ -213,8 +251,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>              }
>              ptep &= pdpe ^ PG_NX_MASK;
>              if (!(pdpe & PG_ACCESSED_MASK)) {
> -                pdpe |= PG_ACCESSED_MASK;
> -                x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
> +                pdpe = update_entry(cs, pdpe_addr, pdpe, PG_ACCESSED_MASK);
> +                if (!pdpe) {
> +                    goto restart;
> +                }
>              }
>              if (pdpe & PG_PSE_MASK) {
>                  /* 1 GB page */
> @@ -256,8 +296,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>          }
>          /* 4 KB page */
>          if (!(pde & PG_ACCESSED_MASK)) {
> -            pde |= PG_ACCESSED_MASK;
> -            x86_stl_phys_notdirty(cs, pde_addr, pde);
> +            pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
> +            if (!pde) {
> +                goto restart;
> +            }
>          }
>          pte_addr = (pde & PG_ADDRESS_MASK) + (((gphys >> 12) & 0x1ff) << 3);
>          pte = x86_ldq_phys(cs, pte_addr);
> @@ -295,8 +337,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>          }
>  
>          if (!(pde & PG_ACCESSED_MASK)) {
> -            pde |= PG_ACCESSED_MASK;
> -            x86_stl_phys_notdirty(cs, pde_addr, pde);
> +            pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
> +            if (!pde) {
> +                goto restart;
> +            }
>          }
>  
>          /* page directory entry */
> @@ -376,7 +420,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>      int error_code = 0;
>      int is_dirty, prot, page_size, is_write, is_user;
>      hwaddr paddr;
> -    uint64_t rsvd_mask = PG_HI_RSVD_MASK;
> +    uint64_t rsvd_mask;
>      uint32_t page_offset;
>      target_ulong vaddr;
>  
> @@ -401,6 +445,8 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>          goto do_mapping;
>      }
>  
> + restart:
> +    rsvd_mask = PG_HI_RSVD_MASK;
>      if (!(env->efer & MSR_EFER_NXE)) {
>          rsvd_mask |= PG_NX_MASK;
>      }
> @@ -436,8 +482,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>                      goto do_fault_rsvd;
>                  }
>                  if (!(pml5e & PG_ACCESSED_MASK)) {
> -                    pml5e |= PG_ACCESSED_MASK;
> -                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
> +                    pml5e = update_entry(cs, pml5e_addr, pml5e, PG_ACCESSED_MASK);
> +                    if (!pml5e) {
> +                        goto restart;
> +                    }
>                  }
>                  ptep = pml5e ^ PG_NX_MASK;
>              } else {
> @@ -456,8 +504,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>                  goto do_fault_rsvd;
>              }
>              if (!(pml4e & PG_ACCESSED_MASK)) {
> -                pml4e |= PG_ACCESSED_MASK;
> -                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
> +                pml4e = update_entry(cs, pml4e_addr, pml4e, PG_ACCESSED_MASK);
> +                if (!pml4e) {
> +                    goto restart;
> +                }
>              }
>              ptep &= pml4e ^ PG_NX_MASK;
>              pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
> @@ -472,8 +522,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>              }
>              ptep &= pdpe ^ PG_NX_MASK;
>              if (!(pdpe & PG_ACCESSED_MASK)) {
> -                pdpe |= PG_ACCESSED_MASK;
> -                x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
> +                pdpe = update_entry(cs, pdpe_addr, pdpe, PG_ACCESSED_MASK);
> +                if (!pdpe) {
> +                    goto restart;
> +                }
>              }
>              if (pdpe & PG_PSE_MASK) {
>                  /* 1 GB page */
> @@ -520,8 +572,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>          }
>          /* 4 KB page */
>          if (!(pde & PG_ACCESSED_MASK)) {
> -            pde |= PG_ACCESSED_MASK;
> -            x86_stl_phys_notdirty(cs, pde_addr, pde);
> +            pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
> +            if (!pde) {
> +                goto restart;
> +            }
>          }
>          pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
>              a20_mask;
> @@ -563,8 +617,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>          }
>  
>          if (!(pde & PG_ACCESSED_MASK)) {
> -            pde |= PG_ACCESSED_MASK;
> -            x86_stl_phys_notdirty(cs, pde_addr, pde);
> +            pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
> +            if (!pde) {
> +                goto restart;
> +            }
>          }
>  
>          /* page directory entry */
> @@ -634,11 +690,11 @@ do_check_protect_pse36:
>      /* yes, it can! */
>      is_dirty = is_write && !(pte & PG_DIRTY_MASK);
>      if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
> -        pte |= PG_ACCESSED_MASK;
> -        if (is_dirty) {
> -            pte |= PG_DIRTY_MASK;
> +        pte = update_entry(cs, pte_addr, pte,
> +                           PG_ACCESSED_MASK | (is_dirty ? PG_DIRTY_MASK : 0));
> +        if (!pte) {
> +            goto restart;
>          }
> -        x86_stl_phys_notdirty(cs, pte_addr, pte);
>      }
>  
>      if (!(pte & PG_DIRTY_MASK)) {

  reply	other threads:[~2018-12-14  0:05 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-13 23:58 [Qemu-devel] [PATCH 1/3] memory_ldst: Add atomic ops for PTE updates Benjamin Herrenschmidt
2018-12-13 23:58 ` [Qemu-devel] [PATCH 2/3] i386: Atomically update PTEs with mttcg Benjamin Herrenschmidt
2018-12-14  0:05   ` Benjamin Herrenschmidt [this message]
2018-12-14 11:11     ` Philippe Mathieu-Daudé
2018-12-13 23:58 ` [Qemu-devel] [PATCH 3/3] ppc: Fix radix RC updates Benjamin Herrenschmidt
2018-12-14  0:03   ` Benjamin Herrenschmidt
2018-12-14  3:01 ` [Qemu-devel] [PATCH 1/3] memory_ldst: Add atomic ops for PTE updates Richard Henderson
2018-12-14  3:54   ` Benjamin Herrenschmidt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c59aabb14c0b89257db6c136300df24d583977b3.camel@kernel.crashing.org \
    --to=benh@kernel.crashing.org \
    --cc=mjc@sifive.com \
    --cc=palmer@sifive.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).