All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Nadav Amit <namit@vmware.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 4.9 23/62] hugetlbfs: flush TLBs correctly after huge_pmd_unshare
Date: Mon,  6 Dec 2021 15:56:06 +0100	[thread overview]
Message-ID: <20211206145549.986494138@linuxfoundation.org> (raw)
In-Reply-To: <20211206145549.155163074@linuxfoundation.org>

From: Nadav Amit <namit@vmware.com>

commit a4a118f2eead1d6c49e00765de89878288d4b890 upstream.

When __unmap_hugepage_range() calls to huge_pmd_unshare() succeed, a TLB
flush is missing.  This TLB flush must be performed before releasing the
i_mmap_rwsem, in order to prevent an unshared PMDs page from being
released and reused before the TLB flush took place.

Arguably, a comprehensive solution would use mmu_gather interface to
batch the TLB flushes and the PMDs page release, however it is not an
easy solution: (1) try_to_unmap_one() and try_to_migrate_one() also call
huge_pmd_unshare() and they cannot use the mmu_gather interface; and (2)
deferring the release of the page reference for the PMDs page until
after i_mmap_rwsem is dropeed can confuse huge_pmd_unshare() into
thinking PMDs are shared when they are not.

Fix __unmap_hugepage_range() by adding the missing TLB flush, and
forcing a flush when unshare is successful.

Fixes: 24669e58477e ("hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages)" # 3.6
Signed-off-by: Nadav Amit <namit@vmware.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/arm/include/asm/tlb.h  |    8 ++++++++
 arch/ia64/include/asm/tlb.h |   10 ++++++++++
 arch/s390/include/asm/tlb.h |   14 ++++++++++++++
 arch/sh/include/asm/tlb.h   |   10 ++++++++++
 arch/um/include/asm/tlb.h   |   12 ++++++++++++
 include/asm-generic/tlb.h   |    2 ++
 mm/hugetlb.c                |   19 +++++++++++++++++++
 mm/memory.c                 |   16 ++++++++++++++++
 8 files changed, 91 insertions(+)

--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -278,6 +278,14 @@ tlb_remove_pmd_tlb_entry(struct mmu_gath
 	tlb_add_flush(tlb, addr);
 }
 
+static inline void
+tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+		    unsigned long size)
+{
+	tlb_add_flush(tlb, address);
+	tlb_add_flush(tlb, address + size - PMD_SIZE);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
 #define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -272,6 +272,16 @@ __tlb_remove_tlb_entry (struct mmu_gathe
 	tlb->end_addr = address + PAGE_SIZE;
 }
 
+static inline void
+tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+		    unsigned long size)
+{
+	if (tlb->start_addr > address)
+		tlb->start_addr = address;
+	if (tlb->end_addr < address + size)
+		tlb->end_addr = address + size;
+}
+
 #define tlb_migrate_finish(mm)	platform_tlb_migrate_finish(mm)
 
 #define tlb_start_vma(tlb, vma)			do { } while (0)
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -116,6 +116,20 @@ static inline void tlb_remove_page_size(
 	return tlb_remove_page(tlb, page);
 }
 
+static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
+				unsigned long address, unsigned long size)
+{
+	/*
+	 * the range might exceed the original range that was provided to
+	 * tlb_gather_mmu(), so we need to update it despite the fact it is
+	 * usually not updated.
+	 */
+	if (tlb->start > address)
+		tlb->start = address;
+	if (tlb->end < address + size)
+		tlb->end = address + size;
+}
+
 /*
  * pte_free_tlb frees a pte table and clears the CRSTE for the
  * page table from the tlb.
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -115,6 +115,16 @@ static inline bool __tlb_remove_page_siz
 	return __tlb_remove_page(tlb, page);
 }
 
+static inline void
+tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+		    unsigned long size)
+{
+	if (tlb->start > address)
+		tlb->start = address;
+	if (tlb->end < address + size)
+		tlb->end = address + size;
+}
+
 static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
 					 struct page *page)
 {
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -128,6 +128,18 @@ static inline void tlb_remove_page_size(
 	return tlb_remove_page(tlb, page);
 }
 
+static inline void
+tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+		    unsigned long size)
+{
+	tlb->need_flush = 1;
+
+	if (tlb->start > address)
+		tlb->start = address;
+	if (tlb->end < address + size)
+		tlb->end = address + size;
+}
+
 /**
  * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
  *
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -123,6 +123,8 @@ void tlb_finish_mmu(struct mmu_gather *t
 							unsigned long end);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
+void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+			 unsigned long size);
 
 static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 				      unsigned long address)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3395,6 +3395,7 @@ void __unmap_hugepage_range(struct mmu_g
 	unsigned long sz = huge_page_size(h);
 	const unsigned long mmun_start = start;	/* For mmu_notifiers */
 	const unsigned long mmun_end   = end;	/* For mmu_notifiers */
+	bool force_flush = false;
 
 	WARN_ON(!is_vm_hugetlb_page(vma));
 	BUG_ON(start & ~huge_page_mask(h));
@@ -3411,6 +3412,8 @@ void __unmap_hugepage_range(struct mmu_g
 		ptl = huge_pte_lock(h, mm, ptep);
 		if (huge_pmd_unshare(mm, &address, ptep)) {
 			spin_unlock(ptl);
+			tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
+			force_flush = true;
 			continue;
 		}
 
@@ -3467,6 +3470,22 @@ void __unmap_hugepage_range(struct mmu_g
 	}
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	tlb_end_vma(tlb, vma);
+
+	/*
+	 * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We
+	 * could defer the flush until now, since by holding i_mmap_rwsem we
+	 * guaranteed that the last refernece would not be dropped. But we must
+	 * do the flushing before we return, as otherwise i_mmap_rwsem will be
+	 * dropped and the last reference to the shared PMDs page might be
+	 * dropped as well.
+	 *
+	 * In theory we could defer the freeing of the PMD pages as well, but
+	 * huge_pmd_unshare() relies on the exact page_count for the PMD page to
+	 * detect sharing, so we cannot defer the release of the page either.
+	 * Instead, do flush now.
+	 */
+	if (force_flush)
+		tlb_flush_mmu(tlb);
 }
 
 void __unmap_hugepage_range_final(struct mmu_gather *tlb,
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -320,6 +320,22 @@ bool __tlb_remove_page_size(struct mmu_g
 	return false;
 }
 
+void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+			 unsigned long size)
+{
+	if (tlb->page_size != 0 && tlb->page_size != PMD_SIZE)
+		tlb_flush_mmu(tlb);
+
+	tlb->page_size = PMD_SIZE;
+	tlb->start = min(tlb->start, address);
+	tlb->end = max(tlb->end, address + size);
+	/*
+	 * Track the last address with which we adjusted the range. This
+	 * will be used later to adjust again after a mmu_flush due to
+	 * failed __tlb_remove_page
+	 */
+	tlb->addr = address + size - PMD_SIZE;
+}
 #endif /* HAVE_GENERIC_MMU_GATHER */
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE



  parent reply	other threads:[~2021-12-06 15:02 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-06 14:55 [PATCH 4.9 00/62] 4.9.292-rc1 review Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 01/62] staging: ion: Prevent incorrect reference counting behavour Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 02/62] USB: serial: option: add Telit LE910S1 0x9200 composition Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 03/62] USB: serial: option: add Fibocom FM101-GL variants Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 04/62] usb: hub: Fix usb enumeration issue due to address0 race Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 05/62] usb: hub: Fix locking issues with address0_mutex Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 06/62] binder: fix test regression due to sender_euid change Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 07/62] ALSA: ctxfi: Fix out-of-range access Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 08/62] staging: rtl8192e: Fix use after free in _rtl92e_pci_disconnect() Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 09/62] fuse: fix page stealing Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 10/62] xen: dont continue xenstore initialization in case of errors Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 11/62] xen: detect uninitialized xenbus in xenbus_init Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 12/62] tracing: Fix pid filtering when triggers are attached Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 13/62] ARM: dts: BCM5301X: Add interrupt properties to GPIO node Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 14/62] ASoC: topology: Add missing rwsem around snd_ctl_remove() calls Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 15/62] net: ieee802154: handle iftypes as u32 Greg Kroah-Hartman
2021-12-06 14:55 ` [PATCH 4.9 16/62] NFSv42: Dont fail clone() unless the OP_CLONE operation failed Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 17/62] ARM: socfpga: Fix crash with CONFIG_FORTIRY_SOURCE Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 18/62] scsi: mpt3sas: Fix kernel panic during drive powercycle test Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 19/62] drm/vc4: fix error code in vc4_create_object() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 20/62] PM: hibernate: use correct mode for swsusp_close() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 21/62] tcp_cubic: fix spurious Hystart ACK train detections for not-cwnd-limited flows Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 22/62] tracing: Check pid filtering when creating events Greg Kroah-Hartman
2021-12-06 14:56 ` Greg Kroah-Hartman [this message]
2021-12-06 14:56 ` [PATCH 4.9 24/62] vhost/vsock: fix incorrect used length reported to the guest Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 25/62] proc/vmcore: fix clearing user buffer by properly using clear_user() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 26/62] NFC: add NCI_UNREG flag to eliminate the race Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 27/62] fuse: release pipe buf after last use Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 28/62] xen: sync include/xen/interface/io/ring.h with Xens newest version Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 29/62] xen/blkfront: read response from backend only once Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 30/62] xen/blkfront: dont take local copy of a request from the ring page Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 31/62] xen/blkfront: dont trust the backend response data blindly Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 32/62] xen/netfront: read response from backend only once Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 33/62] xen/netfront: dont read data from request on the ring page Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 34/62] xen/netfront: disentangle tx_skb_freelist Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 35/62] xen/netfront: dont trust the backend response data blindly Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 36/62] tty: hvc: replace BUG_ON() with negative return value Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 37/62] shm: extend forced shm destroy to support objects from several IPC nses Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 38/62] NFSv42: Fix pagecache invalidation after COPY/CLONE Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 39/62] hugetlb: take PMD sharing into account when flushing tlb/caches Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 40/62] net: return correct error code Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 41/62] platform/x86: thinkpad_acpi: Fix WWAN device disabled issue after S3 deep Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 42/62] s390/setup: avoid using memblock_enforce_memory_limit Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 43/62] thermal: core: Reset previous low and high trip during thermal zone init Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 44/62] scsi: iscsi: Unblock session then wake up error handler Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 45/62] ethernet: hisilicon: hns: hns_dsaf_misc: fix a possible array overflow in hns_dsaf_ge_srst_by_port() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 46/62] net: tulip: de4x5: fix the problem that the array lp->phy[8] may be out of bound Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 47/62] net: ethernet: dec: tulip: de4x5: fix possible array overflows in type3_infoblock() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 48/62] vrf: Reset IPCB/IP6CB when processing outbound pkts in vrf dev xmit Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 49/62] kprobes: Limit max data_size of the kretprobe instances Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 50/62] sata_fsl: fix UAF in sata_fsl_port_stop when rmmod sata_fsl Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 51/62] sata_fsl: fix warning in remove_proc_entry " Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 52/62] fs: add fget_many() and fput_many() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 53/62] fget: check that the fd still exists after getting a ref to it Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 54/62] natsemi: xtensa: fix section mismatch warnings Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 55/62] net: qlogic: qlcnic: Fix a NULL pointer dereference in qlcnic_83xx_add_rings() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 56/62] siphash: use _unaligned version by default Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 57/62] net/rds: correct socket tunable error in rds_tcp_tune() Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 58/62] parisc: Fix "make install" on newer debian releases Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 59/62] vgacon: Propagate console boot parameters before calling `vc_resize Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 60/62] tty: serial: msm_serial: Deactivate RX DMA for polling support Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 61/62] serial: pl011: Add ACPI SBSA UART match id Greg Kroah-Hartman
2021-12-06 14:56 ` [PATCH 4.9 62/62] serial: core: fix transmit-buffer reset and memleak Greg Kroah-Hartman
2021-12-06 18:26 ` [PATCH 4.9 00/62] 4.9.292-rc1 review Florian Fainelli
2021-12-06 21:57 ` Shuah Khan
2021-12-07  9:36 ` Jon Hunter
2021-12-07 20:40 ` Guenter Roeck
2021-12-08  4:10 ` Naresh Kamboju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211206145549.986494138@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mike.kravetz@oracle.com \
    --cc=namit@vmware.com \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.