Archive-only list for patches
 help / color / mirror / Atom feed
diff for duplicates of <20241106120323.519951776@linuxfoundation.org>

diff --git a/a/1.txt b/N1/1.txt
index 67c7c0b..8e4ed29 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1,46 +1,155 @@
-4.19-stable review patch.  If anyone has any objections, please let me know.
+6.11-stable review patch.  If anyone has any objections, please let me know.
 
 ------------------
 
-From: Hailey Mothershead <hailmo@amazon.com>
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
 
-commit 23e4099bdc3c8381992f9eb975c79196d6755210 upstream.
+[ Upstream commit f64e67e5d3a45a4a04286c47afade4b518acd47b ]
 
-I.G 9.7.B for FIPS 140-3 specifies that variables temporarily holding
-cryptographic information should be zeroized once they are no longer
-needed. Accomplish this by using kfree_sensitive for buffers that
-previously held the private key.
+Patch series "fork: do not expose incomplete mm on fork".
 
-Signed-off-by: Hailey Mothershead <hailmo@amazon.com>
-Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+During fork we may place the virtual memory address space into an
+inconsistent state before the fork operation is complete.
+
+In addition, we may encounter an error during the fork operation that
+indicates that the virtual memory address space is invalidated.
+
+As a result, we should not be exposing it in any way to external machinery
+that might interact with the mm or VMAs, machinery that is not designed to
+deal with incomplete state.
+
+We specifically update the fork logic to defer khugepaged and ksm to the
+end of the operation and only to be invoked if no error arose, and
+disallow uffd from observing fork events should an error have occurred.
+
+This patch (of 2):
+
+Currently on fork we expose the virtual address space of a process to
+userland unconditionally if uffd is registered in VMAs, regardless of
+whether an error arose in the fork.
+
+This is performed in dup_userfaultfd_complete() which is invoked
+unconditionally, and performs two duties - invoking registered handlers
+for the UFFD_EVENT_FORK event via dup_fctx(), and clearing down
+userfaultfd_fork_ctx objects established in dup_userfaultfd().
+
+This is problematic, because the virtual address space may not yet be
+correctly initialised if an error arose.
+
+The change in commit d24062914837 ("fork: use __mt_dup() to duplicate
+maple tree in dup_mmap()") makes this more pertinent as we may be in a
+state where entries in the maple tree are not yet consistent.
+
+We address this by, on fork error, ensuring that we roll back state that
+we would otherwise expect to clean up through the event being handled by
+userland and perform the memory freeing duty otherwise performed by
+dup_userfaultfd_complete().
+
+We do this by implementing a new function, dup_userfaultfd_fail(), which
+performs the same loop, only decrementing reference counts.
+
+Note that we perform mmgrab() on the parent and child mm's, however
+userfaultfd_ctx_put() will mmdrop() this once the reference count drops to
+zero, so we will avoid memory leaks correctly here.
+
+Link: https://lkml.kernel.org/r/cover.1729014377.git.lorenzo.stoakes@oracle.com
+Link: https://lkml.kernel.org/r/d3691d58bb58712b6fb3df2be441d175bd3cdf07.1729014377.git.lorenzo.stoakes@oracle.com
+Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
+Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reported-by: Jann Horn <jannh@google.com>
+Reviewed-by: Jann Horn <jannh@google.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
 ---
- crypto/aead.c   |    3 +--
- crypto/cipher.c |    3 +--
- 2 files changed, 2 insertions(+), 4 deletions(-)
-
---- a/crypto/aead.c
-+++ b/crypto/aead.c
-@@ -45,8 +45,7 @@ static int setkey_unaligned(struct crypt
- 	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
- 	memcpy(alignbuffer, key, keylen);
- 	ret = crypto_aead_alg(tfm)->setkey(tfm, alignbuffer, keylen);
--	memset(alignbuffer, 0, keylen);
--	kfree(buffer);
-+	kzfree(buffer);
- 	return ret;
+ fs/userfaultfd.c              | 28 ++++++++++++++++++++++++++++
+ include/linux/userfaultfd_k.h |  5 +++++
+ kernel/fork.c                 |  5 ++++-
+ 3 files changed, 37 insertions(+), 1 deletion(-)
+
+diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
+index 27a3e9285fbf6..2f302da629cb4 100644
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -731,6 +731,34 @@ void dup_userfaultfd_complete(struct list_head *fcs)
+ 	}
  }
  
---- a/crypto/cipher.c
-+++ b/crypto/cipher.c
-@@ -38,8 +38,7 @@ static int setkey_unaligned(struct crypt
- 	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
- 	memcpy(alignbuffer, key, keylen);
- 	ret = cia->cia_setkey(tfm, alignbuffer, keylen);
--	memset(alignbuffer, 0, keylen);
--	kfree(buffer);
-+	kzfree(buffer);
- 	return ret;
++void dup_userfaultfd_fail(struct list_head *fcs)
++{
++	struct userfaultfd_fork_ctx *fctx, *n;
++
++	/*
++	 * An error has occurred on fork, we will tear memory down, but have
++	 * allocated memory for fctx's and raised reference counts for both the
++	 * original and child contexts (and on the mm for each as a result).
++	 *
++	 * These would ordinarily be taken care of by a user handling the event,
++	 * but we are no longer doing so, so manually clean up here.
++	 *
++	 * mm tear down will take care of cleaning up VMA contexts.
++	 */
++	list_for_each_entry_safe(fctx, n, fcs, list) {
++		struct userfaultfd_ctx *octx = fctx->orig;
++		struct userfaultfd_ctx *ctx = fctx->new;
++
++		atomic_dec(&octx->mmap_changing);
++		VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0);
++		userfaultfd_ctx_put(octx);
++		userfaultfd_ctx_put(ctx);
++
++		list_del(&fctx->list);
++		kfree(fctx);
++	}
++}
++
+ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
+ 			     struct vm_userfaultfd_ctx *vm_ctx)
+ {
+diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
+index a12bcf042551e..f4a45a37229ad 100644
+--- a/include/linux/userfaultfd_k.h
++++ b/include/linux/userfaultfd_k.h
+@@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
+ 
+ extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
+ extern void dup_userfaultfd_complete(struct list_head *);
++void dup_userfaultfd_fail(struct list_head *);
  
+ extern void mremap_userfaultfd_prep(struct vm_area_struct *,
+ 				    struct vm_userfaultfd_ctx *);
+@@ -332,6 +333,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l)
+ {
  }
+ 
++static inline void dup_userfaultfd_fail(struct list_head *l)
++{
++}
++
+ static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
+ 					   struct vm_userfaultfd_ctx *ctx)
+ {
+diff --git a/kernel/fork.c b/kernel/fork.c
+index dbf3c5d81df3b..6423ce60b8f97 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -775,7 +775,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+ 	mmap_write_unlock(mm);
+ 	flush_tlb_mm(oldmm);
+ 	mmap_write_unlock(oldmm);
+-	dup_userfaultfd_complete(&uf);
++	if (!retval)
++		dup_userfaultfd_complete(&uf);
++	else
++		dup_userfaultfd_fail(&uf);
+ fail_uprobe_end:
+ 	uprobe_end_dup_mmap();
+ 	return retval;
+-- 
+2.43.0
diff --git a/a/content_digest b/N1/content_digest
index d0c3e27..70fb86e 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -1,60 +1,176 @@
- "ref\020241106120320.865793091@linuxfoundation.org\0"
+ "ref\020241106120319.234238499@linuxfoundation.org\0"
  "From\0Greg Kroah-Hartman <gregkh@linuxfoundation.org>\0"
- "Subject\0[PATCH 4.19 106/350] crypto: aead,cipher - zeroize key buffer after use\0"
- "Date\0Wed,  6 Nov 2024 13:00:34 +0100\0"
+ "Subject\0[PATCH 6.11 174/245] fork: do not invoke uffd on fork if error occurs\0"
+ "Date\0Wed,  6 Nov 2024 13:03:47 +0100\0"
  "To\0stable@vger.kernel.org\0"
  "Cc\0Greg Kroah-Hartman <gregkh@linuxfoundation.org>"
   patches@lists.linux.dev
-  Hailey Mothershead <hailmo@amazon.com>
-  Herbert Xu <herbert@gondor.apana.org.au>
- " Hugo SIMELIERE <hsimeliere.opensource@witekio.com>\0"
+  Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+  Jann Horn <jannh@google.com>
+  Liam R. Howlett <Liam.Howlett@oracle.com>
+  Alexander Viro <viro@zeniv.linux.org.uk>
+  Christian Brauner <brauner@kernel.org>
+  Jan Kara <jack@suse.cz>
+  Linus Torvalds <torvalds@linuxfoundation.org>
+  Vlastimil Babka <vbabka@suse.cz>
+  Andrew Morton <akpm@linux-foundation.org>
+ " Sasha Levin <sashal@kernel.org>\0"
  "\00:1\0"
  "b\0"
- "4.19-stable review patch.  If anyone has any objections, please let me know.\n"
+ "6.11-stable review patch.  If anyone has any objections, please let me know.\n"
  "\n"
  "------------------\n"
  "\n"
- "From: Hailey Mothershead <hailmo@amazon.com>\n"
+ "From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>\n"
  "\n"
- "commit 23e4099bdc3c8381992f9eb975c79196d6755210 upstream.\n"
+ "[ Upstream commit f64e67e5d3a45a4a04286c47afade4b518acd47b ]\n"
  "\n"
- "I.G 9.7.B for FIPS 140-3 specifies that variables temporarily holding\n"
- "cryptographic information should be zeroized once they are no longer\n"
- "needed. Accomplish this by using kfree_sensitive for buffers that\n"
- "previously held the private key.\n"
+ "Patch series \"fork: do not expose incomplete mm on fork\".\n"
  "\n"
- "Signed-off-by: Hailey Mothershead <hailmo@amazon.com>\n"
- "Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>\n"
- "Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>\n"
- "Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>\n"
+ "During fork we may place the virtual memory address space into an\n"
+ "inconsistent state before the fork operation is complete.\n"
+ "\n"
+ "In addition, we may encounter an error during the fork operation that\n"
+ "indicates that the virtual memory address space is invalidated.\n"
+ "\n"
+ "As a result, we should not be exposing it in any way to external machinery\n"
+ "that might interact with the mm or VMAs, machinery that is not designed to\n"
+ "deal with incomplete state.\n"
+ "\n"
+ "We specifically update the fork logic to defer khugepaged and ksm to the\n"
+ "end of the operation and only to be invoked if no error arose, and\n"
+ "disallow uffd from observing fork events should an error have occurred.\n"
+ "\n"
+ "This patch (of 2):\n"
+ "\n"
+ "Currently on fork we expose the virtual address space of a process to\n"
+ "userland unconditionally if uffd is registered in VMAs, regardless of\n"
+ "whether an error arose in the fork.\n"
+ "\n"
+ "This is performed in dup_userfaultfd_complete() which is invoked\n"
+ "unconditionally, and performs two duties - invoking registered handlers\n"
+ "for the UFFD_EVENT_FORK event via dup_fctx(), and clearing down\n"
+ "userfaultfd_fork_ctx objects established in dup_userfaultfd().\n"
+ "\n"
+ "This is problematic, because the virtual address space may not yet be\n"
+ "correctly initialised if an error arose.\n"
+ "\n"
+ "The change in commit d24062914837 (\"fork: use __mt_dup() to duplicate\n"
+ "maple tree in dup_mmap()\") makes this more pertinent as we may be in a\n"
+ "state where entries in the maple tree are not yet consistent.\n"
+ "\n"
+ "We address this by, on fork error, ensuring that we roll back state that\n"
+ "we would otherwise expect to clean up through the event being handled by\n"
+ "userland and perform the memory freeing duty otherwise performed by\n"
+ "dup_userfaultfd_complete().\n"
+ "\n"
+ "We do this by implementing a new function, dup_userfaultfd_fail(), which\n"
+ "performs the same loop, only decrementing reference counts.\n"
+ "\n"
+ "Note that we perform mmgrab() on the parent and child mm's, however\n"
+ "userfaultfd_ctx_put() will mmdrop() this once the reference count drops to\n"
+ "zero, so we will avoid memory leaks correctly here.\n"
+ "\n"
+ "Link: https://lkml.kernel.org/r/cover.1729014377.git.lorenzo.stoakes@oracle.com\n"
+ "Link: https://lkml.kernel.org/r/d3691d58bb58712b6fb3df2be441d175bd3cdf07.1729014377.git.lorenzo.stoakes@oracle.com\n"
+ "Fixes: d24062914837 (\"fork: use __mt_dup() to duplicate maple tree in dup_mmap()\")\n"
+ "Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>\n"
+ "Reported-by: Jann Horn <jannh@google.com>\n"
+ "Reviewed-by: Jann Horn <jannh@google.com>\n"
+ "Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>\n"
+ "Cc: Alexander Viro <viro@zeniv.linux.org.uk>\n"
+ "Cc: Christian Brauner <brauner@kernel.org>\n"
+ "Cc: Jan Kara <jack@suse.cz>\n"
+ "Cc: Linus Torvalds <torvalds@linuxfoundation.org>\n"
+ "Cc: Vlastimil Babka <vbabka@suse.cz>\n"
+ "Cc: <stable@vger.kernel.org>\n"
+ "Signed-off-by: Andrew Morton <akpm@linux-foundation.org>\n"
+ "Signed-off-by: Sasha Levin <sashal@kernel.org>\n"
  "---\n"
- " crypto/aead.c   |    3 +--\n"
- " crypto/cipher.c |    3 +--\n"
- " 2 files changed, 2 insertions(+), 4 deletions(-)\n"
- "\n"
- "--- a/crypto/aead.c\n"
- "+++ b/crypto/aead.c\n"
- "@@ -45,8 +45,7 @@ static int setkey_unaligned(struct crypt\n"
- " \talignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);\n"
- " \tmemcpy(alignbuffer, key, keylen);\n"
- " \tret = crypto_aead_alg(tfm)->setkey(tfm, alignbuffer, keylen);\n"
- "-\tmemset(alignbuffer, 0, keylen);\n"
- "-\tkfree(buffer);\n"
- "+\tkzfree(buffer);\n"
- " \treturn ret;\n"
+ " fs/userfaultfd.c              | 28 ++++++++++++++++++++++++++++\n"
+ " include/linux/userfaultfd_k.h |  5 +++++\n"
+ " kernel/fork.c                 |  5 ++++-\n"
+ " 3 files changed, 37 insertions(+), 1 deletion(-)\n"
+ "\n"
+ "diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c\n"
+ "index 27a3e9285fbf6..2f302da629cb4 100644\n"
+ "--- a/fs/userfaultfd.c\n"
+ "+++ b/fs/userfaultfd.c\n"
+ "@@ -731,6 +731,34 @@ void dup_userfaultfd_complete(struct list_head *fcs)\n"
+ " \t}\n"
  " }\n"
  " \n"
- "--- a/crypto/cipher.c\n"
- "+++ b/crypto/cipher.c\n"
- "@@ -38,8 +38,7 @@ static int setkey_unaligned(struct crypt\n"
- " \talignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);\n"
- " \tmemcpy(alignbuffer, key, keylen);\n"
- " \tret = cia->cia_setkey(tfm, alignbuffer, keylen);\n"
- "-\tmemset(alignbuffer, 0, keylen);\n"
- "-\tkfree(buffer);\n"
- "+\tkzfree(buffer);\n"
- " \treturn ret;\n"
+ "+void dup_userfaultfd_fail(struct list_head *fcs)\n"
+ "+{\n"
+ "+\tstruct userfaultfd_fork_ctx *fctx, *n;\n"
+ "+\n"
+ "+\t/*\n"
+ "+\t * An error has occurred on fork, we will tear memory down, but have\n"
+ "+\t * allocated memory for fctx's and raised reference counts for both the\n"
+ "+\t * original and child contexts (and on the mm for each as a result).\n"
+ "+\t *\n"
+ "+\t * These would ordinarily be taken care of by a user handling the event,\n"
+ "+\t * but we are no longer doing so, so manually clean up here.\n"
+ "+\t *\n"
+ "+\t * mm tear down will take care of cleaning up VMA contexts.\n"
+ "+\t */\n"
+ "+\tlist_for_each_entry_safe(fctx, n, fcs, list) {\n"
+ "+\t\tstruct userfaultfd_ctx *octx = fctx->orig;\n"
+ "+\t\tstruct userfaultfd_ctx *ctx = fctx->new;\n"
+ "+\n"
+ "+\t\tatomic_dec(&octx->mmap_changing);\n"
+ "+\t\tVM_BUG_ON(atomic_read(&octx->mmap_changing) < 0);\n"
+ "+\t\tuserfaultfd_ctx_put(octx);\n"
+ "+\t\tuserfaultfd_ctx_put(ctx);\n"
+ "+\n"
+ "+\t\tlist_del(&fctx->list);\n"
+ "+\t\tkfree(fctx);\n"
+ "+\t}\n"
+ "+}\n"
+ "+\n"
+ " void mremap_userfaultfd_prep(struct vm_area_struct *vma,\n"
+ " \t\t\t     struct vm_userfaultfd_ctx *vm_ctx)\n"
+ " {\n"
+ "diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h\n"
+ "index a12bcf042551e..f4a45a37229ad 100644\n"
+ "--- a/include/linux/userfaultfd_k.h\n"
+ "+++ b/include/linux/userfaultfd_k.h\n"
+ "@@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,\n"
+ " \n"
+ " extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);\n"
+ " extern void dup_userfaultfd_complete(struct list_head *);\n"
+ "+void dup_userfaultfd_fail(struct list_head *);\n"
+ " \n"
+ " extern void mremap_userfaultfd_prep(struct vm_area_struct *,\n"
+ " \t\t\t\t    struct vm_userfaultfd_ctx *);\n"
+ "@@ -332,6 +333,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l)\n"
+ " {\n"
+ " }\n"
  " \n"
-  }
+ "+static inline void dup_userfaultfd_fail(struct list_head *l)\n"
+ "+{\n"
+ "+}\n"
+ "+\n"
+ " static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,\n"
+ " \t\t\t\t\t   struct vm_userfaultfd_ctx *ctx)\n"
+ " {\n"
+ "diff --git a/kernel/fork.c b/kernel/fork.c\n"
+ "index dbf3c5d81df3b..6423ce60b8f97 100644\n"
+ "--- a/kernel/fork.c\n"
+ "+++ b/kernel/fork.c\n"
+ "@@ -775,7 +775,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,\n"
+ " \tmmap_write_unlock(mm);\n"
+ " \tflush_tlb_mm(oldmm);\n"
+ " \tmmap_write_unlock(oldmm);\n"
+ "-\tdup_userfaultfd_complete(&uf);\n"
+ "+\tif (!retval)\n"
+ "+\t\tdup_userfaultfd_complete(&uf);\n"
+ "+\telse\n"
+ "+\t\tdup_userfaultfd_fail(&uf);\n"
+ " fail_uprobe_end:\n"
+ " \tuprobe_end_dup_mmap();\n"
+ " \treturn retval;\n"
+ "-- \n"
+ 2.43.0
 
-33ca6d2101a8cf57c9c5c5d38ae843d66adea3b0706575f5609a428fb9339ce5
+046a6e9de07ac612b07b879e7fded3f248fc1959b12402d015fdfdd7c4ed2a58

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox