All of lore.kernel.org
 help / color / mirror / Atom feed
From: Suren Baghdasaryan <surenb@google.com>
To: akpm@linux-foundation.org
Cc: willy@infradead.org, hannes@cmpxchg.org, mhocko@suse.com,
	josef@toxicpanda.com, jack@suse.cz, ldufour@linux.ibm.com,
	laurent.dufour@fr.ibm.com, michel@lespinasse.org,
	liam.howlett@oracle.com, jglisse@google.com, vbabka@suse.cz,
	minchan@google.com, dave@stgolabs.net,
	punit.agrawal@bytedance.com, lstoakes@gmail.com,
	hdanton@sina.com, apopple@nvidia.com, surenb@google.com,
	linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, kernel-team@android.com
Subject: [PATCH 2/3] mm: drop VMA lock before waiting for migration
Date: Mon,  1 May 2023 10:50:24 -0700	[thread overview]
Message-ID: <20230501175025.36233-2-surenb@google.com> (raw)
In-Reply-To: <20230501175025.36233-1-surenb@google.com>

migration_entry_wait does not need VMA lock, therefore it can be dropped
before waiting. Introduce VM_FAULT_VMA_UNLOCKED to indicate that VMA
lock was dropped while in handle_mm_fault().
Note that once VMA lock is dropped, the VMA reference can't be used as
there are no guarantees it was not freed.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 arch/arm64/mm/fault.c    |  3 ++-
 arch/powerpc/mm/fault.c  |  3 ++-
 arch/s390/mm/fault.c     |  3 ++-
 arch/x86/mm/fault.c      |  3 ++-
 include/linux/mm_types.h |  6 +++++-
 mm/memory.c              | 12 ++++++++++--
 6 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9e0db5c387e3..8fa281f49d61 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -602,7 +602,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 	}
 	fault = handle_mm_fault(vma, addr & PAGE_MASK,
 				mm_flags | FAULT_FLAG_VMA_LOCK, regs);
-	vma_end_read(vma);
+	if (!(fault & VM_FAULT_VMA_UNLOCKED))
+		vma_end_read(vma);
 
 	if (!(fault & VM_FAULT_RETRY)) {
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 531177a4ee08..b27730f07141 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -494,7 +494,8 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 	}
 
 	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-	vma_end_read(vma);
+	if (!(fault & VM_FAULT_VMA_UNLOCKED))
+		vma_end_read(vma);
 
 	if (!(fault & VM_FAULT_RETRY)) {
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index b65144c392b0..cc923dbb0821 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -418,7 +418,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 		goto lock_mmap;
 	}
 	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-	vma_end_read(vma);
+	if (!(fault & VM_FAULT_VMA_UNLOCKED))
+		vma_end_read(vma);
 	if (!(fault & VM_FAULT_RETRY)) {
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
 		goto out;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4399983c50c..ef62ab2fd211 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1347,7 +1347,8 @@ void do_user_addr_fault(struct pt_regs *regs,
 		goto lock_mmap;
 	}
 	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-	vma_end_read(vma);
+	if (!(fault & VM_FAULT_VMA_UNLOCKED))
+		vma_end_read(vma);
 
 	if (!(fault & VM_FAULT_RETRY)) {
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 306a3d1a0fa6..b3b57c6da0e1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1030,6 +1030,7 @@ typedef __bitwise unsigned int vm_fault_t;
  *				fsync() to complete (for synchronous page faults
  *				in DAX)
  * @VM_FAULT_COMPLETED:		->fault completed, meanwhile mmap lock released
+ * @VM_FAULT_VMA_UNLOCKED:	VMA lock was released
  * @VM_FAULT_HINDEX_MASK:	mask HINDEX value
  *
  */
@@ -1047,6 +1048,7 @@ enum vm_fault_reason {
 	VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
 	VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
 	VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
+	VM_FAULT_VMA_UNLOCKED   = (__force vm_fault_t)0x008000,
 	VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
 };
 
@@ -1070,7 +1072,9 @@ enum vm_fault_reason {
 	{ VM_FAULT_RETRY,               "RETRY" },	\
 	{ VM_FAULT_FALLBACK,            "FALLBACK" },	\
 	{ VM_FAULT_DONE_COW,            "DONE_COW" },	\
-	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" }
+	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },	\
+	{ VM_FAULT_COMPLETED,           "COMPLETED" },	\
+	{ VM_FAULT_VMA_UNLOCKED,        "VMA_UNLOCKED" }
 
 struct vm_special_mapping {
 	const char *name;	/* The name, e.g. "[vdso]". */
diff --git a/mm/memory.c b/mm/memory.c
index 41f45819a923..8222acf74fd3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3714,8 +3714,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	entry = pte_to_swp_entry(vmf->orig_pte);
 	if (unlikely(non_swap_entry(entry))) {
 		if (is_migration_entry(entry)) {
-			migration_entry_wait(vma->vm_mm, vmf->pmd,
-					     vmf->address);
+			/* Save mm in case VMA lock is dropped */
+			struct mm_struct *mm = vma->vm_mm;
+
+			if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+				/* No need to hold VMA lock for migration */
+				vma_end_read(vma);
+				/* CAUTION! VMA can't be used after this */
+				ret |= VM_FAULT_VMA_UNLOCKED;
+			}
+			migration_entry_wait(mm, vmf->pmd, vmf->address);
 		} else if (is_device_exclusive_entry(entry)) {
 			vmf->page = pfn_swap_entry_to_page(entry);
 			ret = remove_device_exclusive_entry(vmf);
-- 
2.40.1.495.gc816e09b53d-goog


  reply	other threads:[~2023-05-01 17:50 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-01 17:50 [PATCH 1/3] mm: handle swap page faults under VMA lock if page is uncontended Suren Baghdasaryan
2023-05-01 17:50 ` Suren Baghdasaryan [this message]
2023-05-02 13:21   ` [PATCH 2/3] mm: drop VMA lock before waiting for migration Alistair Popple
2023-05-02 16:39     ` Suren Baghdasaryan
2023-05-03 13:03       ` Alistair Popple
2023-05-03 19:42         ` Suren Baghdasaryan
2023-05-02 14:28   ` Matthew Wilcox
2023-05-02 16:41     ` Suren Baghdasaryan
2023-05-01 17:50 ` [PATCH 3/3] mm: implement folio wait under VMA lock Suren Baghdasaryan
2023-05-02  2:02 ` [PATCH 1/3] mm: handle swap page faults under VMA lock if page is uncontended Matthew Wilcox
2023-05-02  2:30   ` Suren Baghdasaryan
2023-05-02  3:22     ` Matthew Wilcox
2023-05-02  5:04       ` Suren Baghdasaryan
2023-05-02 15:03         ` Matthew Wilcox
2023-05-02 16:36           ` Suren Baghdasaryan
2023-05-02 22:31             ` Matthew Wilcox
2023-05-02 23:04               ` Suren Baghdasaryan
2023-05-02 23:40                 ` Matthew Wilcox
2023-05-03  1:05                   ` Suren Baghdasaryan
2023-05-03  8:34                 ` Yosry Ahmed
2023-05-03 19:57                   ` Suren Baghdasaryan
2023-05-03 20:57                     ` Yosry Ahmed
2023-05-05  5:02                       ` Huang, Ying
2023-05-05 22:30                         ` Suren Baghdasaryan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230501175025.36233-2-surenb@google.com \
    --to=surenb@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=dave@stgolabs.net \
    --cc=hannes@cmpxchg.org \
    --cc=hdanton@sina.com \
    --cc=jack@suse.cz \
    --cc=jglisse@google.com \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@android.com \
    --cc=laurent.dufour@fr.ibm.com \
    --cc=ldufour@linux.ibm.com \
    --cc=liam.howlett@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lstoakes@gmail.com \
    --cc=mhocko@suse.com \
    --cc=michel@lespinasse.org \
    --cc=minchan@google.com \
    --cc=punit.agrawal@bytedance.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.