From: Suren Baghdasaryan <surenb@google.com>
To: akpm@linux-foundation.org
Cc: willy@infradead.org, david@kernel.org, ziy@nvidia.com,
matthew.brost@intel.com, joshua.hahnjy@gmail.com,
rakie.kim@sk.com, byungchul@sk.com, gourry@gourry.net,
ying.huang@linux.alibaba.com, apopple@nvidia.com,
lorenzo.stoakes@oracle.com, baolin.wang@linux.alibaba.com,
Liam.Howlett@oracle.com, npache@redhat.com,
ryan.roberts@arm.com, dev.jain@arm.com, baohua@kernel.org,
lance.yang@linux.dev, vbabka@suse.cz, jannh@google.com,
rppt@kernel.org, mhocko@suse.com, pfalcato@suse.de,
kees@kernel.org, maddy@linux.ibm.com, npiggin@gmail.com,
mpe@ellerman.id.au, chleroy@kernel.org,
borntraeger@linux.ibm.com, frankja@linux.ibm.com,
imbrenda@linux.ibm.com, hca@linux.ibm.com, gor@linux.ibm.com,
agordeev@linux.ibm.com, svens@linux.ibm.com,
gerald.schaefer@linux.ibm.com, linux-mm@kvack.org,
linuxppc-dev@lists.ozlabs.org, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-s390@vger.kernel.org,
surenb@google.com
Subject: [PATCH v3 3/3] mm: use vma_start_write_killable() in process_vma_walk_lock()
Date: Wed, 25 Feb 2026 23:06:09 -0800 [thread overview]
Message-ID: <20260226070609.3072570-4-surenb@google.com> (raw)
In-Reply-To: <20260226070609.3072570-1-surenb@google.com>
Replace vma_start_write() with vma_start_write_killable() when
process_vma_walk_lock() is used with PGWALK_WRLOCK option.
Adjust its direct and indirect users to check for a possible error
and handle it. Ensure users handle EINTR correctly and do not ignore
it.
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
arch/s390/kvm/kvm-s390.c | 2 +-
fs/proc/task_mmu.c | 5 ++++-
mm/mempolicy.c | 14 +++++++++++---
mm/pagewalk.c | 20 ++++++++++++++------
mm/vma.c | 22 ++++++++++++++--------
mm/vma.h | 6 ++++++
6 files changed, 50 insertions(+), 19 deletions(-)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7a175d86cef0..337e4f7db63a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2948,7 +2948,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
}
/* must be called without kvm->lock */
r = kvm_s390_handle_pv(kvm, &args);
- if (copy_to_user(argp, &args, sizeof(args))) {
+ if (r != -EINTR && copy_to_user(argp, &args, sizeof(args))) {
r = -EFAULT;
break;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e091931d7ca1..1238a2988eb6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1797,6 +1797,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
struct clear_refs_private cp = {
.type = type,
};
+ int err;
if (mmap_write_lock_killable(mm)) {
count = -EINTR;
@@ -1824,7 +1825,9 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
0, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
+ err = walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
+ if (err < 0)
+ count = err;
if (type == CLEAR_REFS_SOFT_DIRTY) {
mmu_notifier_invalidate_range_end(&range);
flush_tlb_mm(mm);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 90939f5bde02..3c8b3dfc9c56 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -988,6 +988,8 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
&queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
err = walk_page_range(mm, start, end, ops, &qp);
+ if (err == -EINTR)
+ return err;
if (!qp.first)
/* whole range in hole */
@@ -1309,9 +1311,14 @@ static long migrate_to_node(struct mm_struct *mm, int source, int dest,
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
mmap_read_unlock(mm);
+ if (nr_failed == -EINTR)
+ err = nr_failed;
+
if (!list_empty(&pagelist)) {
- err = migrate_pages(&pagelist, alloc_migration_target, NULL,
- (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
+ if (!err)
+ err = migrate_pages(&pagelist, alloc_migration_target,
+ NULL, (unsigned long)&mtc,
+ MIGRATE_SYNC, MR_SYSCALL, NULL);
if (err)
putback_movable_pages(&pagelist);
}
@@ -1611,7 +1618,8 @@ static long do_mbind(unsigned long start, unsigned long len,
MR_MEMPOLICY_MBIND, NULL);
}
- if (nr_failed && (flags & MPOL_MF_STRICT))
+ /* Do not mask EINTR */
+ if ((err != -EINTR) && (nr_failed && (flags & MPOL_MF_STRICT)))
err = -EIO;
if (!list_empty(&pagelist))
putback_movable_pages(&pagelist);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index a94c401ab2cf..dc9f7a7709c6 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -425,14 +425,13 @@ static inline void process_mm_walk_lock(struct mm_struct *mm,
mmap_assert_write_locked(mm);
}
-static inline void process_vma_walk_lock(struct vm_area_struct *vma,
+static inline int process_vma_walk_lock(struct vm_area_struct *vma,
enum page_walk_lock walk_lock)
{
#ifdef CONFIG_PER_VMA_LOCK
switch (walk_lock) {
case PGWALK_WRLOCK:
- vma_start_write(vma);
- break;
+ return vma_start_write_killable(vma);
case PGWALK_WRLOCK_VERIFY:
vma_assert_write_locked(vma);
break;
@@ -444,6 +443,7 @@ static inline void process_vma_walk_lock(struct vm_area_struct *vma,
break;
}
#endif
+ return 0;
}
/*
@@ -487,7 +487,9 @@ int walk_page_range_mm_unsafe(struct mm_struct *mm, unsigned long start,
if (ops->pte_hole)
err = ops->pte_hole(start, next, -1, &walk);
} else { /* inside vma */
- process_vma_walk_lock(vma, ops->walk_lock);
+ err = process_vma_walk_lock(vma, ops->walk_lock);
+ if (err)
+ break;
walk.vma = vma;
next = min(end, vma->vm_end);
vma = find_vma(mm, vma->vm_end);
@@ -704,6 +706,7 @@ int walk_page_range_vma_unsafe(struct vm_area_struct *vma, unsigned long start,
.vma = vma,
.private = private,
};
+ int err;
if (start >= end || !walk.mm)
return -EINVAL;
@@ -711,7 +714,9 @@ int walk_page_range_vma_unsafe(struct vm_area_struct *vma, unsigned long start,
return -EINVAL;
process_mm_walk_lock(walk.mm, ops->walk_lock);
- process_vma_walk_lock(vma, ops->walk_lock);
+ err = process_vma_walk_lock(vma, ops->walk_lock);
+ if (err)
+ return err;
return __walk_page_range(start, end, &walk);
}
@@ -734,6 +739,7 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
.vma = vma,
.private = private,
};
+ int err;
if (!walk.mm)
return -EINVAL;
@@ -741,7 +747,9 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
return -EINVAL;
process_mm_walk_lock(walk.mm, ops->walk_lock);
- process_vma_walk_lock(vma, ops->walk_lock);
+ err = process_vma_walk_lock(vma, ops->walk_lock);
+ if (err)
+ return err;
return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
}
diff --git a/mm/vma.c b/mm/vma.c
index 9f2664f1d078..46bbad6e64a4 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -998,14 +998,18 @@ static __must_check struct vm_area_struct *vma_merge_existing_range(
if (anon_dup)
unlink_anon_vmas(anon_dup);
- /*
- * This means we have failed to clone anon_vma's correctly, but no
- * actual changes to VMAs have occurred, so no harm no foul - if the
- * user doesn't want this reported and instead just wants to give up on
- * the merge, allow it.
- */
- if (!vmg->give_up_on_oom)
- vmg->state = VMA_MERGE_ERROR_NOMEM;
+ if (err == -EINTR) {
+ vmg->state = VMA_MERGE_ERROR_INTR;
+ } else {
+ /*
+ * This means we have failed to clone anon_vma's correctly,
+ * but no actual changes to VMAs have occurred, so no harm no
+ * foul - if the user doesn't want this reported and instead
+ * just wants to give up on the merge, allow it.
+ */
+ if (!vmg->give_up_on_oom)
+ vmg->state = VMA_MERGE_ERROR_NOMEM;
+ }
return NULL;
}
@@ -1681,6 +1685,8 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg)
merged = vma_merge_existing_range(vmg);
if (merged)
return merged;
+ if (vmg_intr(vmg))
+ return ERR_PTR(-EINTR);
if (vmg_nomem(vmg))
return ERR_PTR(-ENOMEM);
diff --git a/mm/vma.h b/mm/vma.h
index eba388c61ef4..fe4560f81f4f 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -56,6 +56,7 @@ struct vma_munmap_struct {
enum vma_merge_state {
VMA_MERGE_START,
VMA_MERGE_ERROR_NOMEM,
+ VMA_MERGE_ERROR_INTR,
VMA_MERGE_NOMERGE,
VMA_MERGE_SUCCESS,
};
@@ -226,6 +227,11 @@ static inline bool vmg_nomem(struct vma_merge_struct *vmg)
return vmg->state == VMA_MERGE_ERROR_NOMEM;
}
+static inline bool vmg_intr(struct vma_merge_struct *vmg)
+{
+ return vmg->state == VMA_MERGE_ERROR_INTR;
+}
+
/* Assumes addr >= vma->vm_start. */
static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
unsigned long addr)
--
2.53.0.414.gf7e9f6c205-goog
next prev parent reply other threads:[~2026-02-26 7:06 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-26 7:06 [PATCH v3 0/3] Use killable vma write locking in most places Suren Baghdasaryan
2026-02-26 7:06 ` [PATCH v3 1/3] mm/vma: cleanup error handling path in vma_expand() Suren Baghdasaryan
2026-02-26 16:42 ` Liam R. Howlett
2026-02-26 17:23 ` Suren Baghdasaryan
2026-03-02 13:57 ` Lorenzo Stoakes
2026-03-03 21:08 ` Suren Baghdasaryan
2026-02-26 7:06 ` [PATCH v3 2/3] mm: replace vma_start_write() with vma_start_write_killable() Suren Baghdasaryan
2026-02-26 17:43 ` Liam R. Howlett
2026-02-26 21:44 ` Suren Baghdasaryan
2026-03-02 14:52 ` Lorenzo Stoakes
2026-03-03 22:11 ` Suren Baghdasaryan
2026-03-03 22:18 ` Matthew Wilcox
2026-03-04 0:02 ` Suren Baghdasaryan
2026-03-04 3:24 ` Matthew Wilcox
2026-03-04 16:53 ` Lorenzo Stoakes (Oracle)
2026-03-04 19:00 ` Matthew Wilcox
2026-03-05 15:55 ` Suren Baghdasaryan
2026-03-04 16:51 ` Lorenzo Stoakes (Oracle)
2026-03-05 15:54 ` Suren Baghdasaryan
2026-02-26 7:06 ` Suren Baghdasaryan [this message]
2026-02-26 18:10 ` [PATCH v3 3/3] mm: use vma_start_write_killable() in process_vma_walk_lock() Claudio Imbrenda
2026-02-26 18:24 ` Suren Baghdasaryan
2026-02-27 8:57 ` Claudio Imbrenda
2026-03-02 15:19 ` Lorenzo Stoakes
2026-03-03 23:59 ` Suren Baghdasaryan
2026-03-04 16:58 ` Lorenzo Stoakes (Oracle)
2026-03-05 16:12 ` Suren Baghdasaryan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260226070609.3072570-4-surenb@google.com \
--to=surenb@google.com \
--cc=Liam.Howlett@oracle.com \
--cc=agordeev@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=baohua@kernel.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=borntraeger@linux.ibm.com \
--cc=byungchul@sk.com \
--cc=chleroy@kernel.org \
--cc=david@kernel.org \
--cc=dev.jain@arm.com \
--cc=frankja@linux.ibm.com \
--cc=gerald.schaefer@linux.ibm.com \
--cc=gor@linux.ibm.com \
--cc=gourry@gourry.net \
--cc=hca@linux.ibm.com \
--cc=imbrenda@linux.ibm.com \
--cc=jannh@google.com \
--cc=joshua.hahnjy@gmail.com \
--cc=kees@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=lance.yang@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-s390@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=maddy@linux.ibm.com \
--cc=matthew.brost@intel.com \
--cc=mhocko@suse.com \
--cc=mpe@ellerman.id.au \
--cc=npache@redhat.com \
--cc=npiggin@gmail.com \
--cc=pfalcato@suse.de \
--cc=rakie.kim@sk.com \
--cc=rppt@kernel.org \
--cc=ryan.roberts@arm.com \
--cc=svens@linux.ibm.com \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.