From: "Philippe Mathieu-Daudé" <philmd@redhat.com>
To: Michael Clark <mjc@sifive.com>, Palmer Dabbelt <palmer@sifive.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>,
Richard Henderson <richard.henderson@linaro.org>,
qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 2/3] i386: Atomically update PTEs with mttcg
Date: Fri, 14 Dec 2018 12:11:51 +0100 [thread overview]
Message-ID: <926d9f34-c615-e4a7-b0d7-04b77d215540@redhat.com> (raw)
In-Reply-To: <c59aabb14c0b89257db6c136300df24d583977b3.camel@kernel.crashing.org>
On 12/14/18 1:05 AM, Benjamin Herrenschmidt wrote:
> Note to RiscV folks: You may want to adapt your code to do the same as
> this, esp. afaik, what you do today is endian-broken if host and guest
> endian are different.
Cc'ing the RISC-V list.
> Cheers,
> Ben.
>
> On Fri, 2018-12-14 at 10:58 +1100, Benjamin Herrenschmidt wrote:
>> Afaik, this isn't well documented (at least it wasn't when I last looked)
>> but OSes such as Linux rely on this behaviour:
>>
>> The HW updates to the page tables need to be done atomically with the
>> checking of the present bit (and other permissions).
>>
>> This is what allows Linux to do simple xchg of PTEs with 0 and assume
>> the value read has "final" stable dirty and accessed bits (the TLB
>> invalidation is deferred).
>>
>> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> ---
>> target/i386/excp_helper.c | 104 +++++++++++++++++++++++++++++---------
>> 1 file changed, 80 insertions(+), 24 deletions(-)
>>
>> diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c
>> index 49231f6b69..93fc24c011 100644
>> --- a/target/i386/excp_helper.c
>> +++ b/target/i386/excp_helper.c
>> @@ -157,11 +157,45 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>>
>> #else
>>
>> +static inline uint64_t update_entry(CPUState *cs, target_ulong addr,
>> + uint64_t orig_entry, uint32_t bits)
>> +{
>> + uint64_t new_entry = orig_entry | bits;
>> +
>> + /* Write the updated bottom 32-bits */
>> + if (qemu_tcg_mttcg_enabled()) {
>> + uint32_t old_le = cpu_to_le32(orig_entry);
>> + uint32_t new_le = cpu_to_le32(new_entry);
>> + MemTxResult result;
>> + uint32_t old_ret;
>> +
>> + old_ret = address_space_cmpxchgl_notdirty(cs->as, addr,
>> + old_le, new_le,
>> + MEMTXATTRS_UNSPECIFIED,
>> + &result);
>> + if (result == MEMTX_OK) {
>> + if (old_ret != old_le && old_ret != new_le) {
>> + new_entry = 0;
>> + }
>> + return new_entry;
>> + }
>> +
>> + /* Do we need to support this case where PTEs aren't in RAM ?
>> + *
>> + * For now fallback to non-atomic case
>> + */
>> + }
>> +
>> + x86_stl_phys_notdirty(cs, addr, new_entry);
>> +
>> + return new_entry;
>> +}
>> +
>> static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>> int *prot)
>> {
>> CPUX86State *env = &X86_CPU(cs)->env;
>> - uint64_t rsvd_mask = PG_HI_RSVD_MASK;
>> + uint64_t rsvd_mask;
>> uint64_t ptep, pte;
>> uint64_t exit_info_1 = 0;
>> target_ulong pde_addr, pte_addr;
>> @@ -172,6 +206,8 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>> return gphys;
>> }
>>
>> + restart:
>> + rsvd_mask = PG_HI_RSVD_MASK;
>> if (!(env->nested_pg_mode & SVM_NPT_NXE)) {
>> rsvd_mask |= PG_NX_MASK;
>> }
>> @@ -198,8 +234,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>> goto do_fault_rsvd;
>> }
>> if (!(pml4e & PG_ACCESSED_MASK)) {
>> - pml4e |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
>> + pml4e = update_entry(cs, pml4e_addr, pml4e, PG_ACCESSED_MASK);
>> + if (!pml4e) {
>> + goto restart;
>> + }
>> }
>> ptep &= pml4e ^ PG_NX_MASK;
>> pdpe_addr = (pml4e & PG_ADDRESS_MASK) +
>> @@ -213,8 +251,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>> }
>> ptep &= pdpe ^ PG_NX_MASK;
>> if (!(pdpe & PG_ACCESSED_MASK)) {
>> - pdpe |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
>> + pdpe = update_entry(cs, pdpe_addr, pdpe, PG_ACCESSED_MASK);
>> + if (!pdpe) {
>> + goto restart;
>> + }
>> }
>> if (pdpe & PG_PSE_MASK) {
>> /* 1 GB page */
>> @@ -256,8 +296,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>> }
>> /* 4 KB page */
>> if (!(pde & PG_ACCESSED_MASK)) {
>> - pde |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pde_addr, pde);
>> + pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> + if (!pde) {
>> + goto restart;
>> + }
>> }
>> pte_addr = (pde & PG_ADDRESS_MASK) + (((gphys >> 12) & 0x1ff) << 3);
>> pte = x86_ldq_phys(cs, pte_addr);
>> @@ -295,8 +337,10 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
>> }
>>
>> if (!(pde & PG_ACCESSED_MASK)) {
>> - pde |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pde_addr, pde);
>> + pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> + if (!pde) {
>> + goto restart;
>> + }
>> }
>>
>> /* page directory entry */
>> @@ -376,7 +420,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>> int error_code = 0;
>> int is_dirty, prot, page_size, is_write, is_user;
>> hwaddr paddr;
>> - uint64_t rsvd_mask = PG_HI_RSVD_MASK;
>> + uint64_t rsvd_mask;
>> uint32_t page_offset;
>> target_ulong vaddr;
>>
>> @@ -401,6 +445,8 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>> goto do_mapping;
>> }
>>
>> + restart:
>> + rsvd_mask = PG_HI_RSVD_MASK;
>> if (!(env->efer & MSR_EFER_NXE)) {
>> rsvd_mask |= PG_NX_MASK;
>> }
>> @@ -436,8 +482,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>> goto do_fault_rsvd;
>> }
>> if (!(pml5e & PG_ACCESSED_MASK)) {
>> - pml5e |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
>> + pml5e = update_entry(cs, pml5e_addr, pml5e, PG_ACCESSED_MASK);
>> + if (!pml5e) {
>> + goto restart;
>> + }
>> }
>> ptep = pml5e ^ PG_NX_MASK;
>> } else {
>> @@ -456,8 +504,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>> goto do_fault_rsvd;
>> }
>> if (!(pml4e & PG_ACCESSED_MASK)) {
>> - pml4e |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
>> + pml4e = update_entry(cs, pml4e_addr, pml4e, PG_ACCESSED_MASK);
>> + if (!pml4e) {
>> + goto restart;
>> + }
>> }
>> ptep &= pml4e ^ PG_NX_MASK;
>> pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
>> @@ -472,8 +522,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>> }
>> ptep &= pdpe ^ PG_NX_MASK;
>> if (!(pdpe & PG_ACCESSED_MASK)) {
>> - pdpe |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
>> + pdpe = update_entry(cs, pdpe_addr, pdpe, PG_ACCESSED_MASK);
>> + if (!pdpe) {
>> + goto restart;
>> + }
>> }
>> if (pdpe & PG_PSE_MASK) {
>> /* 1 GB page */
>> @@ -520,8 +572,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>> }
>> /* 4 KB page */
>> if (!(pde & PG_ACCESSED_MASK)) {
>> - pde |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pde_addr, pde);
>> + pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> + if (!pde) {
>> + goto restart;
>> + }
>> }
>> pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
>> a20_mask;
>> @@ -563,8 +617,10 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size,
>> }
>>
>> if (!(pde & PG_ACCESSED_MASK)) {
>> - pde |= PG_ACCESSED_MASK;
>> - x86_stl_phys_notdirty(cs, pde_addr, pde);
>> + pde = update_entry(cs, pde_addr, pde, PG_ACCESSED_MASK);
>> + if (!pde) {
>> + goto restart;
>> + }
>> }
>>
>> /* page directory entry */
>> @@ -634,11 +690,11 @@ do_check_protect_pse36:
>> /* yes, it can! */
>> is_dirty = is_write && !(pte & PG_DIRTY_MASK);
>> if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
>> - pte |= PG_ACCESSED_MASK;
>> - if (is_dirty) {
>> - pte |= PG_DIRTY_MASK;
>> + pte = update_entry(cs, pte_addr, pte,
>> + PG_ACCESSED_MASK | (is_dirty ? PG_DIRTY_MASK : 0));
>> + if (!pte) {
>> + goto restart;
>> }
>> - x86_stl_phys_notdirty(cs, pte_addr, pte);
>> }
>>
>> if (!(pte & PG_DIRTY_MASK)) {
>
>
next prev parent reply other threads:[~2018-12-14 11:12 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-13 23:58 [Qemu-devel] [PATCH 1/3] memory_ldst: Add atomic ops for PTE updates Benjamin Herrenschmidt
2018-12-13 23:58 ` [Qemu-devel] [PATCH 2/3] i386: Atomically update PTEs with mttcg Benjamin Herrenschmidt
2018-12-14 0:05 ` Benjamin Herrenschmidt
2018-12-14 11:11 ` Philippe Mathieu-Daudé [this message]
2018-12-13 23:58 ` [Qemu-devel] [PATCH 3/3] ppc: Fix radix RC updates Benjamin Herrenschmidt
2018-12-14 0:03 ` Benjamin Herrenschmidt
2018-12-14 3:01 ` [Qemu-devel] [PATCH 1/3] memory_ldst: Add atomic ops for PTE updates Richard Henderson
2018-12-14 3:54 ` Benjamin Herrenschmidt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=926d9f34-c615-e4a7-b0d7-04b77d215540@redhat.com \
--to=philmd@redhat.com \
--cc=mjc@sifive.com \
--cc=palmer@sifive.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-riscv@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).