All of lore.kernel.org
 help / color / mirror / Atom feed
From: Suren Baghdasaryan <surenb@google.com>
To: akpm@linux-foundation.org
Cc: willy@infradead.org, liam.howlett@oracle.com,
	lorenzo.stoakes@oracle.com,  mhocko@suse.com, vbabka@suse.cz,
	hannes@cmpxchg.org, mjguzik@gmail.com,  oliver.sang@intel.com,
	mgorman@techsingularity.net, david@redhat.com,
	 peterx@redhat.com, oleg@redhat.com, dave@stgolabs.net,
	paulmck@kernel.org,  brauner@kernel.org, dhowells@redhat.com,
	hdanton@sina.com, hughd@google.com,  minchan@google.com,
	jannh@google.com, shakeel.butt@linux.dev,
	 souravpanda@google.com, pasha.tatashin@soleen.com,
	linux-mm@kvack.org,  linux-kernel@vger.kernel.org,
	kernel-team@android.com, surenb@google.com
Subject: [PATCH v2 4/5] mm: make vma cache SLAB_TYPESAFE_BY_RCU
Date: Tue, 12 Nov 2024 11:46:34 -0800	[thread overview]
Message-ID: <20241112194635.444146-5-surenb@google.com> (raw)
In-Reply-To: <20241112194635.444146-1-surenb@google.com>

To enable SLAB_TYPESAFE_BY_RCU for vma cache we need to ensure that
object reuse before RCU grace period is over will be detected inside
lock_vma_under_rcu().
lock_vma_under_rcu() enters RCU read section, finds the vma at the
given address, locks the vma and checks if it got detached or remapped
to cover a different address range. These last checks are there
to ensure that the vma was not modified after we found it but before
locking it. Vma reuse introduces a possibility that in between those
events of finding and locking the vma, it can get detached, reused,
added into a tree and be marked as attached. Current checks will help
detecting cases when:
- vma was reused but not yet added into the tree (detached check)
- vma was reused at a different address range (address check)
If vma is covering a new address range which still includes the address
we were looking for, it's not a problem unless the reused vma was added
into a different address space. Therefore checking that vma->vm_mm is
still the same is the the only missing check to detect vma reuse.
Add this missing check into lock_vma_under_rcu() and change vma cache
to include SLAB_TYPESAFE_BY_RCU. This will facilitate vm_area_struct
reuse and will minimize the number of call_rcu() calls.
Adding vm_freeptr into vm_area_struct avoids bloating that structure.
lock_vma_under_rcu() checks of the detached flag guarantees that vma
is valid and attached to a tree, therefore unioning vm_freeptr with
vm_start/vm_end is not an issue even though lock_vma_under_rcu() is
using them.
As part of this change freeptr_t declaration is moved into mm_types.h
to avoid circular dependencies between mm_types.h and slab.h.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/mm_types.h | 10 +++++++---
 include/linux/slab.h     |  6 ------
 kernel/fork.c            | 29 +++++++++++++----------------
 mm/memory.c              |  2 +-
 4 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5c4bfdcfac72..37580cc7bec0 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -32,6 +32,12 @@
 struct address_space;
 struct mem_cgroup;
 
+/*
+ * freeptr_t represents a SLUB freelist pointer, which might be encoded
+ * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
+ */
+typedef struct { unsigned long v; } freeptr_t;
+
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -673,9 +679,7 @@ struct vm_area_struct {
 			unsigned long vm_start;
 			unsigned long vm_end;
 		};
-#ifdef CONFIG_PER_VMA_LOCK
-		struct rcu_head vm_rcu;	/* Used for deferred freeing. */
-#endif
+		freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
 	};
 
 	/*
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b35e2db7eb0e..cb45db2402ac 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -212,12 +212,6 @@ enum _slab_flag_bits {
 #define SLAB_NO_OBJ_EXT		__SLAB_FLAG_UNUSED
 #endif
 
-/*
- * freeptr_t represents a SLUB freelist pointer, which might be encoded
- * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
- */
-typedef struct { unsigned long v; } freeptr_t;
-
 /*
  * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
  *
diff --git a/kernel/fork.c b/kernel/fork.c
index 7823797e31d2..946c3f9a9342 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -478,25 +478,15 @@ void __vm_area_free(struct vm_area_struct *vma)
 	kmem_cache_free(vm_area_cachep, vma);
 }
 
-#ifdef CONFIG_PER_VMA_LOCK
-static void vm_area_free_rcu_cb(struct rcu_head *head)
+void vm_area_free(struct vm_area_struct *vma)
 {
-	struct vm_area_struct *vma = container_of(head, struct vm_area_struct,
-						  vm_rcu);
-
+#ifdef CONFIG_PER_VMA_LOCK
+	/* The vma should be detached while being destroyed. */
+	VM_BUG_ON_VMA(!is_vma_detached(vma), vma);
 	/* The vma should not be locked while being destroyed. */
 	VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock.lock), vma);
-	__vm_area_free(vma);
-}
 #endif
-
-void vm_area_free(struct vm_area_struct *vma)
-{
-#ifdef CONFIG_PER_VMA_LOCK
-	call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb);
-#else
 	__vm_area_free(vma);
-#endif
 }
 
 static void account_kernel_stack(struct task_struct *tsk, int account)
@@ -3115,6 +3105,11 @@ void __init mm_cache_init(void)
 
 void __init proc_caches_init(void)
 {
+	struct kmem_cache_args args = {
+		.use_freeptr_offset = true,
+		.freeptr_offset = offsetof(struct vm_area_struct, vm_freeptr),
+	};
+
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -3131,9 +3126,11 @@ void __init proc_caches_init(void)
 			sizeof(struct fs_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
 			NULL);
-	vm_area_cachep = KMEM_CACHE(vm_area_struct,
-			SLAB_HWCACHE_ALIGN|SLAB_NO_MERGE|SLAB_PANIC|
+	vm_area_cachep = kmem_cache_create("vm_area_struct",
+			sizeof(struct vm_area_struct), &args,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
 			SLAB_ACCOUNT);
+
 	mmap_init();
 	nsproxy_cache_init();
 }
diff --git a/mm/memory.c b/mm/memory.c
index d0197a0c0996..9c414c81f14a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6279,7 +6279,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
 		goto inval;
 
 	/* Check if the VMA got isolated after we found it */
-	if (is_vma_detached(vma)) {
+	if (is_vma_detached(vma) || vma->vm_mm != mm) {
 		vma_end_read(vma);
 		count_vm_vma_lock_event(VMA_LOCK_MISS);
 		/* The area was replaced with another one */
-- 
2.47.0.277.g8800431eea-goog



  parent reply	other threads:[~2024-11-12 19:47 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-12 19:46 [PATCH v2 0/5] move per-vma lock into vm_area_struct Suren Baghdasaryan
2024-11-12 19:46 ` [PATCH v2 1/5] mm: introduce vma_start_read_locked{_nested} helpers Suren Baghdasaryan
2024-11-13 14:10   ` Lorenzo Stoakes
2024-11-13 15:30     ` Suren Baghdasaryan
2024-11-12 19:46 ` [PATCH v2 2/5] mm: move per-vma lock into vm_area_struct Suren Baghdasaryan
2024-11-13 14:28   ` Lorenzo Stoakes
2024-11-13 14:45     ` Vlastimil Babka
2024-11-13 14:58       ` Lorenzo Stoakes
2024-11-13 15:09         ` Vlastimil Babka
2024-11-13 14:53     ` Mateusz Guzik
2024-11-13 14:59       ` Lorenzo Stoakes
2024-11-13 15:01     ` Lorenzo Stoakes
2024-11-13 15:45       ` Suren Baghdasaryan
2024-11-13 15:42     ` Suren Baghdasaryan
2024-11-12 19:46 ` [PATCH v2 3/5] mm: mark vma as detached until it's added into vma tree Suren Baghdasaryan
2024-11-13 14:43   ` Lorenzo Stoakes
2024-11-13 15:37     ` Suren Baghdasaryan
2024-11-12 19:46 ` Suren Baghdasaryan [this message]
2024-11-13  2:57   ` [PATCH v2 4/5] mm: make vma cache SLAB_TYPESAFE_BY_RCU Suren Baghdasaryan
2024-11-13  5:08     ` Hugh Dickins
2024-11-13  6:03       ` Suren Baghdasaryan
2024-11-13  6:52         ` Hugh Dickins
2024-11-13  8:19           ` Suren Baghdasaryan
2024-11-13  8:58   ` Vlastimil Babka
2024-11-13 12:38     ` Liam R. Howlett
2024-11-13 13:57       ` Matthew Wilcox
2024-11-13 15:22         ` Liam R. Howlett
2024-11-13 15:25           ` Suren Baghdasaryan
2024-11-13 15:29             ` Liam R. Howlett
2024-11-13 15:47               ` Suren Baghdasaryan
2024-11-13 19:05                 ` Suren Baghdasaryan
2024-11-14 16:18                   ` Suren Baghdasaryan
2024-11-14 16:21                     ` Vlastimil Babka
2024-11-13 16:44           ` Jann Horn
2024-11-13 20:59             ` Matthew Wilcox
2024-11-13 21:23               ` Jann Horn
2024-11-12 19:46 ` [PATCH v2 5/5] docs/mm: document latest changes to vm_lock Suren Baghdasaryan
2024-11-12 19:51   ` Suren Baghdasaryan
2024-11-13 14:46     ` Lorenzo Stoakes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241112194635.444146-5-surenb@google.com \
    --to=surenb@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=brauner@kernel.org \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dhowells@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hdanton@sina.com \
    --cc=hughd@google.com \
    --cc=jannh@google.com \
    --cc=kernel-team@android.com \
    --cc=liam.howlett@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=minchan@google.com \
    --cc=mjguzik@gmail.com \
    --cc=oleg@redhat.com \
    --cc=oliver.sang@intel.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=paulmck@kernel.org \
    --cc=peterx@redhat.com \
    --cc=shakeel.butt@linux.dev \
    --cc=souravpanda@google.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.