From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Nadav Amit <namit@vmware.com>,
Mike Kravetz <mike.kravetz@oracle.com>,
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
Andrew Morton <akpm@linux-foundation.org>,
Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 4.4 19/52] hugetlbfs: flush TLBs correctly after huge_pmd_unshare
Date: Mon, 6 Dec 2021 15:56:03 +0100 [thread overview]
Message-ID: <20211206145548.546472716@linuxfoundation.org> (raw)
In-Reply-To: <20211206145547.892668902@linuxfoundation.org>
From: Nadav Amit <namit@vmware.com>
commit a4a118f2eead1d6c49e00765de89878288d4b890 upstream.
When __unmap_hugepage_range() calls to huge_pmd_unshare() succeed, a TLB
flush is missing. This TLB flush must be performed before releasing the
i_mmap_rwsem, in order to prevent an unshared PMDs page from being
released and reused before the TLB flush took place.
Arguably, a comprehensive solution would use mmu_gather interface to
batch the TLB flushes and the PMDs page release, however it is not an
easy solution: (1) try_to_unmap_one() and try_to_migrate_one() also call
huge_pmd_unshare() and they cannot use the mmu_gather interface; and (2)
deferring the release of the page reference for the PMDs page until
after i_mmap_rwsem is dropeed can confuse huge_pmd_unshare() into
thinking PMDs are shared when they are not.
Fix __unmap_hugepage_range() by adding the missing TLB flush, and
forcing a flush when unshare is successful.
Fixes: 24669e58477e ("hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages)" # 3.6
Signed-off-by: Nadav Amit <namit@vmware.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
arch/arm/include/asm/tlb.h | 8 ++++++++
arch/ia64/include/asm/tlb.h | 10 ++++++++++
arch/s390/include/asm/tlb.h | 13 +++++++++++++
arch/sh/include/asm/tlb.h | 10 ++++++++++
arch/um/include/asm/tlb.h | 12 ++++++++++++
include/asm-generic/tlb.h | 7 +++++++
mm/hugetlb.c | 5 ++++-
7 files changed, 64 insertions(+), 1 deletion(-)
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -257,6 +257,14 @@ tlb_remove_pmd_tlb_entry(struct mmu_gath
tlb_add_flush(tlb, addr);
}
+static inline void
+tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+ unsigned long size)
+{
+ tlb_add_flush(tlb, address);
+ tlb_add_flush(tlb, address + size - PMD_SIZE);
+}
+
#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -251,6 +251,16 @@ __tlb_remove_tlb_entry (struct mmu_gathe
tlb->end_addr = address + PAGE_SIZE;
}
+static inline void
+tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+ unsigned long size)
+{
+ if (tlb->start_addr > address)
+ tlb->start_addr = address;
+ if (tlb->end_addr < address + size)
+ tlb->end_addr = address + size;
+}
+
#define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm)
#define tlb_start_vma(tlb, vma) do { } while (0)
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -97,6 +97,19 @@ static inline void tlb_remove_page(struc
{
free_page_and_swap_cache(page);
}
+static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
+ unsigned long address, unsigned long size)
+{
+ /*
+ * the range might exceed the original range that was provided to
+ * tlb_gather_mmu(), so we need to update it despite the fact it is
+ * usually not updated.
+ */
+ if (tlb->start > address)
+ tlb->start = address;
+ if (tlb->end < address + size)
+ tlb->end = address + size;
+}
/*
* pte_free_tlb frees a pte table and clears the CRSTE for the
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -65,6 +65,16 @@ tlb_remove_tlb_entry(struct mmu_gather *
tlb->end = address + PAGE_SIZE;
}
+static inline void
+tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+ unsigned long size)
+{
+ if (tlb->start > address)
+ tlb->start = address;
+ if (tlb->end < address + size)
+ tlb->end = address + size;
+}
+
/*
* In the case of tlb vma handling, we can optimise these away in the
* case where we're doing a full MM flush. When we're doing a munmap,
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -110,6 +110,18 @@ static inline void tlb_remove_page(struc
__tlb_remove_page(tlb, page);
}
+static inline void
+tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+ unsigned long size)
+{
+ tlb->need_flush = 1;
+
+ if (tlb->start > address)
+ tlb->start = address;
+ if (tlb->end < address + size)
+ tlb->end = address + size;
+}
+
/**
* tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
*
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -165,6 +165,13 @@ static inline void __tlb_reset_range(str
#define tlb_end_vma __tlb_end_vma
#endif
+static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
+ unsigned long address, unsigned long size)
+{
+ tlb->start = min(tlb->start, address);
+ tlb->end = max(tlb->end, address + size);
+}
+
#ifndef __tlb_remove_tlb_entry
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#endif
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3290,8 +3290,11 @@ again:
continue;
ptl = huge_pte_lock(h, mm, ptep);
- if (huge_pmd_unshare(mm, &address, ptep))
+ if (huge_pmd_unshare(mm, &address, ptep)) {
+ tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
+ force_flush = 1;
goto unlock;
+ }
pte = huge_ptep_get(ptep);
if (huge_pte_none(pte))
next prev parent reply other threads:[~2021-12-06 15:02 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-06 14:55 [PATCH 4.4 00/52] 4.4.294-rc1 review Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 01/52] staging: ion: Prevent incorrect reference counting behavour Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 02/52] USB: serial: option: add Telit LE910S1 0x9200 composition Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 03/52] USB: serial: option: add Fibocom FM101-GL variants Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 04/52] usb: hub: Fix usb enumeration issue due to address0 race Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 05/52] usb: hub: Fix locking issues with address0_mutex Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 06/52] binder: fix test regression due to sender_euid change Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 07/52] ALSA: ctxfi: Fix out-of-range access Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 08/52] staging: rtl8192e: Fix use after free in _rtl92e_pci_disconnect() Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 09/52] xen: dont continue xenstore initialization in case of errors Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 10/52] xen: detect uninitialized xenbus in xenbus_init Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 11/52] ARM: dts: BCM5301X: Add interrupt properties to GPIO node Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 12/52] ASoC: topology: Add missing rwsem around snd_ctl_remove() calls Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 13/52] net: ieee802154: handle iftypes as u32 Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 14/52] NFSv42: Dont fail clone() unless the OP_CLONE operation failed Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.4 15/52] ARM: socfpga: Fix crash with CONFIG_FORTIRY_SOURCE Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 16/52] scsi: mpt3sas: Fix kernel panic during drive powercycle test Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 17/52] tcp_cubic: fix spurious Hystart ACK train detections for not-cwnd-limited flows Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 18/52] tracing: Check pid filtering when creating events Greg Kroah-Hartman
2021-12-06 14:56 ` Greg Kroah-Hartman [this message]
2021-12-06 14:56 ` [PATCH 4.4 20/52] proc/vmcore: fix clearing user buffer by properly using clear_user() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 21/52] NFC: add NCI_UNREG flag to eliminate the race Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 22/52] fuse: fix page stealing Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 23/52] fuse: release pipe buf after last use Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 24/52] shm: extend forced shm destroy to support objects from several IPC nses Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 25/52] xen: sync include/xen/interface/io/ring.h with Xens newest version Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 26/52] xen/blkfront: read response from backend only once Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 27/52] xen/blkfront: dont take local copy of a request from the ring page Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 28/52] xen/blkfront: dont trust the backend response data blindly Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 29/52] xen/netfront: read response from backend only once Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 30/52] xen/netfront: dont read data from request on the ring page Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 31/52] xen/netfront: disentangle tx_skb_freelist Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 32/52] xen/netfront: dont trust the backend response data blindly Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 33/52] tty: hvc: replace BUG_ON() with negative return value Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 34/52] hugetlb: take PMD sharing into account when flushing tlb/caches Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 35/52] net: return correct error code Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 36/52] platform/x86: thinkpad_acpi: Fix WWAN device disabled issue after S3 deep Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 37/52] s390/setup: avoid using memblock_enforce_memory_limit Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 38/52] scsi: iscsi: Unblock session then wake up error handler Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 39/52] net: tulip: de4x5: fix the problem that the array lp->phy[8] may be out of bound Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 40/52] net: ethernet: dec: tulip: de4x5: fix possible array overflows in type3_infoblock() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 41/52] kprobes: Limit max data_size of the kretprobe instances Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 42/52] sata_fsl: fix UAF in sata_fsl_port_stop when rmmod sata_fsl Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 43/52] sata_fsl: fix warning in remove_proc_entry " Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 44/52] fs: add fget_many() and fput_many() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 45/52] fget: check that the fd still exists after getting a ref to it Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 46/52] natsemi: xtensa: fix section mismatch warnings Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 47/52] net: qlogic: qlcnic: Fix a NULL pointer dereference in qlcnic_83xx_add_rings() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 48/52] siphash: use _unaligned version by default Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 49/52] parisc: Fix "make install" on newer debian releases Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 50/52] vgacon: Propagate console boot parameters before calling `vc_resize Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 51/52] tty: serial: msm_serial: Deactivate RX DMA for polling support Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.4 52/52] serial: pl011: Add ACPI SBSA UART match id Greg Kroah-Hartman
2021-12-06 19:34 ` [PATCH 4.4 00/52] 4.4.294-rc1 review Pavel Machek
2021-12-06 21:58 ` Shuah Khan
2021-12-07 9:36 ` Jon Hunter
2021-12-07 20:39 ` Guenter Roeck
2021-12-08 4:15 ` Naresh Kamboju
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211206145548.546472716@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=akpm@linux-foundation.org \
--cc=aneesh.kumar@linux.vnet.ibm.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mike.kravetz@oracle.com \
--cc=namit@vmware.com \
--cc=stable@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.