From: Suren Baghdasaryan <surenb@google.com>
To: akpm@linux-foundation.org
Cc: liam@infradead.org, ljs@kernel.org, vbabka@kernel.org,
david@redhat.com, willy@infradead.org, jannh@google.com,
paulmck@kernel.org, pfalcato@suse.de, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
surenb@google.com
Subject: [PATCH 2/2] fs/proc/task_mmu: read proc/pid/smaps_rollup under per-vma lock
Date: Fri, 5 Jun 2026 18:57:29 -0700 [thread overview]
Message-ID: <20260606015729.1837935-2-surenb@google.com> (raw)
In-Reply-To: <20260606015729.1837935-1-surenb@google.com>
proc/pid/smaps_rollup can be read using the combination of RCU and
VMA read locks, similar to proc/pid/{maps|smaps|numa_maps}. RCU is
required to safely traverse the VMA tree and VMA lock stabilizes the
VMA being processed and the pagetable walk.
Note that we have to keep the logic to drop mmap_lock on contention
because even when using per-VMA locks we might have to fall back to
holding the mmap_lock.
Running Paul's contention benchmark [1] shows considerable improvement
both in median and in the worst case latencies:
Execution command: run-proc-vs-map.sh --nsamples 20 --rawdata -- \
--busyduration 2 --procfile smaps_rollup
Baseline:
0.174 0.161 2.553
0.174 0.164 2.663
0.174 0.165 2.664
0.174 0.166 2.679
0.174 0.167 2.691
0.174 0.168 2.704
0.174 0.169 2.729
0.174 0.172 2.741
0.174 0.174 2.745
0.174 0.174 2.755
0.174 0.175 2.790
0.174 0.177 2.809
0.174 0.179 3.096
0.174 0.183 3.144
0.174 0.184 3.158
0.174 0.185 3.175
0.174 0.185 4.568
0.174 0.198 4.821
0.174 0.214 5.143
0.174 0.251 5.220
Patched:
0.007 0.007 1.952
0.007 0.007 1.955
0.007 0.007 1.955
0.007 0.007 1.955
0.007 0.007 1.957
0.007 0.007 1.969
0.007 0.007 2.065
0.007 0.007 2.075
0.007 0.007 2.146
0.007 0.007 2.195
0.007 0.007 2.223
0.007 0.007 2.259
0.007 0.007 2.488
0.007 0.007 2.562
0.007 0.007 2.599
0.007 0.007 2.697
0.007 0.007 3.030
0.007 0.007 3.075
0.007 0.007 3.145
0.007 0.007 3.225
[1] https://github.com/paulmckrcu/proc-mmap_sem-test
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
fs/proc/task_mmu.c | 134 ++++++++++++++++++++-------------------------
1 file changed, 59 insertions(+), 75 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 023422fcee12..c2bd9f5bbbcd 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -233,6 +233,16 @@ static inline void reacquire_rcu(struct proc_maps_private *priv)
vma_iter_set(&priv->iter, priv->lock_ctx.locked_vma->vm_end);
}
+static inline bool is_mmap_lock_contended(struct proc_maps_private *priv)
+{
+ struct proc_maps_locking_ctx *lock_ctx = &priv->lock_ctx;
+
+ if (!lock_ctx->mmap_locked)
+ return false;
+
+ return !!mmap_lock_is_contended(lock_ctx->mm);
+}
+
#else /* CONFIG_PER_VMA_LOCK */
static inline int lock_ctx_mm(struct proc_maps_locking_ctx *lock_ctx)
@@ -268,6 +278,11 @@ static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv,
return false;
}
+static inline bool is_mmap_lock_contended(struct proc_maps_private *priv)
+{
+ return !!mmap_lock_is_contended(priv->lock_ctx.mm);
+}
+
static inline void drop_rcu(struct proc_maps_private *priv) {}
static inline void reacquire_rcu(struct proc_maps_private *priv) {}
@@ -1486,12 +1501,15 @@ static int show_smap(struct seq_file *m, void *v)
static int show_smaps_rollup(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
+ struct proc_maps_locking_ctx *lock_ctx = &priv->lock_ctx;
+ struct mm_struct *mm = lock_ctx->mm;
struct mem_size_stats mss = {};
- struct mm_struct *mm = priv->lock_ctx.mm;
struct vm_area_struct *vma;
- unsigned long vma_start = 0, last_vma_end = 0;
+ unsigned long vma_start = 0;
+ unsigned long last_vma_end = 0;
+ loff_t pos = 0;
int ret = 0;
- VMA_ITERATOR(vmi, mm, 0);
+
priv->task = get_proc_task(priv->inode);
if (!priv->task)
@@ -1502,89 +1520,55 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
goto out_put_task;
}
- ret = lock_ctx_mm(&priv->lock_ctx);
+ hold_task_mempolicy(priv);
+ ret = lock_vma_range(m, lock_ctx);
if (ret)
goto out_put_mm;
- hold_task_mempolicy(priv);
- vma = vma_next(&vmi);
-
- if (unlikely(!vma))
+ vma_iter_init(&priv->iter, mm, 0);
+ vma = proc_get_vma(m, &pos);
+ if (unlikely(!vma) || vma == get_gate_vma(priv->lock_ctx.mm))
goto empty_set;
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto out_unlock;
+ }
+
vma_start = vma->vm_start;
- do {
- smap_gather_stats(priv, vma, &mss, 0);
+ while (vma) {
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto out_unlock;
+ }
+
+ if (vma == get_gate_vma(priv->lock_ctx.mm))
+ break;
+
+ /*
+ * If after retaking mmap_lock, already reported VMA grew or
+ * merged with the next one, then iterate from last_vma_end.
+ */
+ smap_gather_stats(priv, vma, &mss,
+ vma->vm_start < last_vma_end ? last_vma_end : 0);
last_vma_end = vma->vm_end;
/*
* Release mmap_lock temporarily if someone wants to
- * access it for write request.
+ * take it for write request.
*/
- if (mmap_lock_is_contended(mm)) {
- vma_iter_invalidate(&vmi);
- unlock_ctx_mm(&priv->lock_ctx);
- ret = lock_ctx_mm(&priv->lock_ctx);
- if (ret) {
- release_task_mempolicy(priv);
+ if (is_mmap_lock_contended(priv)) {
+ unlock_vma_range(&priv->lock_ctx);
+ ret = lock_vma_range(m, lock_ctx);
+ if (ret)
goto out_put_mm;
- }
-
- /*
- * After dropping the lock, there are four cases to
- * consider. See the following example for explanation.
- *
- * +------+------+-----------+
- * | VMA1 | VMA2 | VMA3 |
- * +------+------+-----------+
- * | | | |
- * 4k 8k 16k 400k
- *
- * Suppose we drop the lock after reading VMA2 due to
- * contention, then we get:
- *
- * last_vma_end = 16k
- *
- * 1) VMA2 is freed, but VMA3 exists:
- *
- * vma_next(vmi) will return VMA3.
- * In this case, just continue from VMA3.
- *
- * 2) VMA2 still exists:
- *
- * vma_next(vmi) will return VMA3.
- * In this case, just continue from VMA3.
- *
- * 3) No more VMAs can be found:
- *
- * vma_next(vmi) will return NULL.
- * No more things to do, just break.
- *
- * 4) (last_vma_end - 1) is the middle of a vma (VMA'):
- *
- * vma_next(vmi) will return VMA' whose range
- * contains last_vma_end.
- * Iterate VMA' from last_vma_end.
- */
- vma = vma_next(&vmi);
- /* Case 3 above */
- if (!vma)
- break;
-
- /* Case 1 and 2 above */
- if (vma->vm_start >= last_vma_end) {
- smap_gather_stats(priv, vma, &mss, 0);
- last_vma_end = vma->vm_end;
- continue;
- }
- /* Case 4 above */
- if (vma->vm_end > last_vma_end) {
- smap_gather_stats(priv, vma, &mss, last_vma_end);
- last_vma_end = vma->vm_end;
- }
+ /* Resume from the last position. */
+ pos = last_vma_end;
+ vma_iter_init(&priv->iter, mm, pos);
}
- } for_each_vma(vmi, vma);
+ vma = proc_get_vma(m, &pos);
+ }
empty_set:
show_vma_header_prefix(m, vma_start, last_vma_end, 0, 0, 0, 0);
@@ -1593,10 +1577,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
__show_smap(m, &mss, true);
- release_task_mempolicy(priv);
- unlock_ctx_mm(&priv->lock_ctx);
-
+out_unlock:
+ unlock_vma_range(&priv->lock_ctx);
out_put_mm:
+ release_task_mempolicy(priv);
mmput(mm);
out_put_task:
put_task_struct(priv->task);
--
2.54.0.1032.g2f8565e1d1-goog
next prev parent reply other threads:[~2026-06-06 1:57 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-06 1:57 [PATCH 1/2] fs/proc/task_mmu: change lock_vma_range() to return error code Suren Baghdasaryan
2026-06-06 1:57 ` Suren Baghdasaryan [this message]
2026-06-06 2:00 ` [PATCH 2/2] fs/proc/task_mmu: read proc/pid/smaps_rollup under per-vma lock Suren Baghdasaryan
2026-06-06 8:12 ` Lorenzo Stoakes
2026-06-07 19:45 ` Suren Baghdasaryan
2026-06-08 8:14 ` Lorenzo Stoakes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260606015729.1837935-2-surenb@google.com \
--to=surenb@google.com \
--cc=akpm@linux-foundation.org \
--cc=david@redhat.com \
--cc=jannh@google.com \
--cc=liam@infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=paulmck@kernel.org \
--cc=pfalcato@suse.de \
--cc=vbabka@kernel.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox