diff for duplicates of <20241106120323.519951776@linuxfoundation.org> diff --git a/a/1.txt b/N1/1.txt index 67c7c0b..8e4ed29 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,46 +1,155 @@ -4.19-stable review patch. If anyone has any objections, please let me know. +6.11-stable review patch. If anyone has any objections, please let me know. ------------------ -From: Hailey Mothershead <hailmo@amazon.com> +From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> -commit 23e4099bdc3c8381992f9eb975c79196d6755210 upstream. +[ Upstream commit f64e67e5d3a45a4a04286c47afade4b518acd47b ] -I.G 9.7.B for FIPS 140-3 specifies that variables temporarily holding -cryptographic information should be zeroized once they are no longer -needed. Accomplish this by using kfree_sensitive for buffers that -previously held the private key. +Patch series "fork: do not expose incomplete mm on fork". -Signed-off-by: Hailey Mothershead <hailmo@amazon.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com> -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +During fork we may place the virtual memory address space into an +inconsistent state before the fork operation is complete. + +In addition, we may encounter an error during the fork operation that +indicates that the virtual memory address space is invalidated. + +As a result, we should not be exposing it in any way to external machinery +that might interact with the mm or VMAs, machinery that is not designed to +deal with incomplete state. + +We specifically update the fork logic to defer khugepaged and ksm to the +end of the operation and only to be invoked if no error arose, and +disallow uffd from observing fork events should an error have occurred. + +This patch (of 2): + +Currently on fork we expose the virtual address space of a process to +userland unconditionally if uffd is registered in VMAs, regardless of +whether an error arose in the fork. + +This is performed in dup_userfaultfd_complete() which is invoked +unconditionally, and performs two duties - invoking registered handlers +for the UFFD_EVENT_FORK event via dup_fctx(), and clearing down +userfaultfd_fork_ctx objects established in dup_userfaultfd(). + +This is problematic, because the virtual address space may not yet be +correctly initialised if an error arose. + +The change in commit d24062914837 ("fork: use __mt_dup() to duplicate +maple tree in dup_mmap()") makes this more pertinent as we may be in a +state where entries in the maple tree are not yet consistent. + +We address this by, on fork error, ensuring that we roll back state that +we would otherwise expect to clean up through the event being handled by +userland and perform the memory freeing duty otherwise performed by +dup_userfaultfd_complete(). + +We do this by implementing a new function, dup_userfaultfd_fail(), which +performs the same loop, only decrementing reference counts. + +Note that we perform mmgrab() on the parent and child mm's, however +userfaultfd_ctx_put() will mmdrop() this once the reference count drops to +zero, so we will avoid memory leaks correctly here. + +Link: https://lkml.kernel.org/r/cover.1729014377.git.lorenzo.stoakes@oracle.com +Link: https://lkml.kernel.org/r/d3691d58bb58712b6fb3df2be441d175bd3cdf07.1729014377.git.lorenzo.stoakes@oracle.com +Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()") +Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +Reported-by: Jann Horn <jannh@google.com> +Reviewed-by: Jann Horn <jannh@google.com> +Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com> +Cc: Alexander Viro <viro@zeniv.linux.org.uk> +Cc: Christian Brauner <brauner@kernel.org> +Cc: Jan Kara <jack@suse.cz> +Cc: Linus Torvalds <torvalds@linuxfoundation.org> +Cc: Vlastimil Babka <vbabka@suse.cz> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> --- - crypto/aead.c | 3 +-- - crypto/cipher.c | 3 +-- - 2 files changed, 2 insertions(+), 4 deletions(-) - ---- a/crypto/aead.c -+++ b/crypto/aead.c -@@ -45,8 +45,7 @@ static int setkey_unaligned(struct crypt - alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); - memcpy(alignbuffer, key, keylen); - ret = crypto_aead_alg(tfm)->setkey(tfm, alignbuffer, keylen); -- memset(alignbuffer, 0, keylen); -- kfree(buffer); -+ kzfree(buffer); - return ret; + fs/userfaultfd.c | 28 ++++++++++++++++++++++++++++ + include/linux/userfaultfd_k.h | 5 +++++ + kernel/fork.c | 5 ++++- + 3 files changed, 37 insertions(+), 1 deletion(-) + +diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c +index 27a3e9285fbf6..2f302da629cb4 100644 +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -731,6 +731,34 @@ void dup_userfaultfd_complete(struct list_head *fcs) + } } ---- a/crypto/cipher.c -+++ b/crypto/cipher.c -@@ -38,8 +38,7 @@ static int setkey_unaligned(struct crypt - alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); - memcpy(alignbuffer, key, keylen); - ret = cia->cia_setkey(tfm, alignbuffer, keylen); -- memset(alignbuffer, 0, keylen); -- kfree(buffer); -+ kzfree(buffer); - return ret; ++void dup_userfaultfd_fail(struct list_head *fcs) ++{ ++ struct userfaultfd_fork_ctx *fctx, *n; ++ ++ /* ++ * An error has occurred on fork, we will tear memory down, but have ++ * allocated memory for fctx's and raised reference counts for both the ++ * original and child contexts (and on the mm for each as a result). ++ * ++ * These would ordinarily be taken care of by a user handling the event, ++ * but we are no longer doing so, so manually clean up here. ++ * ++ * mm tear down will take care of cleaning up VMA contexts. ++ */ ++ list_for_each_entry_safe(fctx, n, fcs, list) { ++ struct userfaultfd_ctx *octx = fctx->orig; ++ struct userfaultfd_ctx *ctx = fctx->new; ++ ++ atomic_dec(&octx->mmap_changing); ++ VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0); ++ userfaultfd_ctx_put(octx); ++ userfaultfd_ctx_put(ctx); ++ ++ list_del(&fctx->list); ++ kfree(fctx); ++ } ++} ++ + void mremap_userfaultfd_prep(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx *vm_ctx) + { +diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h +index a12bcf042551e..f4a45a37229ad 100644 +--- a/include/linux/userfaultfd_k.h ++++ b/include/linux/userfaultfd_k.h +@@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, + + extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); + extern void dup_userfaultfd_complete(struct list_head *); ++void dup_userfaultfd_fail(struct list_head *); + extern void mremap_userfaultfd_prep(struct vm_area_struct *, + struct vm_userfaultfd_ctx *); +@@ -332,6 +333,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l) + { } + ++static inline void dup_userfaultfd_fail(struct list_head *l) ++{ ++} ++ + static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx *ctx) + { +diff --git a/kernel/fork.c b/kernel/fork.c +index dbf3c5d81df3b..6423ce60b8f97 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -775,7 +775,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, + mmap_write_unlock(mm); + flush_tlb_mm(oldmm); + mmap_write_unlock(oldmm); +- dup_userfaultfd_complete(&uf); ++ if (!retval) ++ dup_userfaultfd_complete(&uf); ++ else ++ dup_userfaultfd_fail(&uf); + fail_uprobe_end: + uprobe_end_dup_mmap(); + return retval; +-- +2.43.0 diff --git a/a/content_digest b/N1/content_digest index d0c3e27..70fb86e 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -1,60 +1,176 @@ - "ref\020241106120320.865793091@linuxfoundation.org\0" + "ref\020241106120319.234238499@linuxfoundation.org\0" "From\0Greg Kroah-Hartman <gregkh@linuxfoundation.org>\0" - "Subject\0[PATCH 4.19 106/350] crypto: aead,cipher - zeroize key buffer after use\0" - "Date\0Wed, 6 Nov 2024 13:00:34 +0100\0" + "Subject\0[PATCH 6.11 174/245] fork: do not invoke uffd on fork if error occurs\0" + "Date\0Wed, 6 Nov 2024 13:03:47 +0100\0" "To\0stable@vger.kernel.org\0" "Cc\0Greg Kroah-Hartman <gregkh@linuxfoundation.org>" patches@lists.linux.dev - Hailey Mothershead <hailmo@amazon.com> - Herbert Xu <herbert@gondor.apana.org.au> - " Hugo SIMELIERE <hsimeliere.opensource@witekio.com>\0" + Lorenzo Stoakes <lorenzo.stoakes@oracle.com> + Jann Horn <jannh@google.com> + Liam R. Howlett <Liam.Howlett@oracle.com> + Alexander Viro <viro@zeniv.linux.org.uk> + Christian Brauner <brauner@kernel.org> + Jan Kara <jack@suse.cz> + Linus Torvalds <torvalds@linuxfoundation.org> + Vlastimil Babka <vbabka@suse.cz> + Andrew Morton <akpm@linux-foundation.org> + " Sasha Levin <sashal@kernel.org>\0" "\00:1\0" "b\0" - "4.19-stable review patch. If anyone has any objections, please let me know.\n" + "6.11-stable review patch. If anyone has any objections, please let me know.\n" "\n" "------------------\n" "\n" - "From: Hailey Mothershead <hailmo@amazon.com>\n" + "From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>\n" "\n" - "commit 23e4099bdc3c8381992f9eb975c79196d6755210 upstream.\n" + "[ Upstream commit f64e67e5d3a45a4a04286c47afade4b518acd47b ]\n" "\n" - "I.G 9.7.B for FIPS 140-3 specifies that variables temporarily holding\n" - "cryptographic information should be zeroized once they are no longer\n" - "needed. Accomplish this by using kfree_sensitive for buffers that\n" - "previously held the private key.\n" + "Patch series \"fork: do not expose incomplete mm on fork\".\n" "\n" - "Signed-off-by: Hailey Mothershead <hailmo@amazon.com>\n" - "Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>\n" - "Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>\n" - "Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>\n" + "During fork we may place the virtual memory address space into an\n" + "inconsistent state before the fork operation is complete.\n" + "\n" + "In addition, we may encounter an error during the fork operation that\n" + "indicates that the virtual memory address space is invalidated.\n" + "\n" + "As a result, we should not be exposing it in any way to external machinery\n" + "that might interact with the mm or VMAs, machinery that is not designed to\n" + "deal with incomplete state.\n" + "\n" + "We specifically update the fork logic to defer khugepaged and ksm to the\n" + "end of the operation and only to be invoked if no error arose, and\n" + "disallow uffd from observing fork events should an error have occurred.\n" + "\n" + "This patch (of 2):\n" + "\n" + "Currently on fork we expose the virtual address space of a process to\n" + "userland unconditionally if uffd is registered in VMAs, regardless of\n" + "whether an error arose in the fork.\n" + "\n" + "This is performed in dup_userfaultfd_complete() which is invoked\n" + "unconditionally, and performs two duties - invoking registered handlers\n" + "for the UFFD_EVENT_FORK event via dup_fctx(), and clearing down\n" + "userfaultfd_fork_ctx objects established in dup_userfaultfd().\n" + "\n" + "This is problematic, because the virtual address space may not yet be\n" + "correctly initialised if an error arose.\n" + "\n" + "The change in commit d24062914837 (\"fork: use __mt_dup() to duplicate\n" + "maple tree in dup_mmap()\") makes this more pertinent as we may be in a\n" + "state where entries in the maple tree are not yet consistent.\n" + "\n" + "We address this by, on fork error, ensuring that we roll back state that\n" + "we would otherwise expect to clean up through the event being handled by\n" + "userland and perform the memory freeing duty otherwise performed by\n" + "dup_userfaultfd_complete().\n" + "\n" + "We do this by implementing a new function, dup_userfaultfd_fail(), which\n" + "performs the same loop, only decrementing reference counts.\n" + "\n" + "Note that we perform mmgrab() on the parent and child mm's, however\n" + "userfaultfd_ctx_put() will mmdrop() this once the reference count drops to\n" + "zero, so we will avoid memory leaks correctly here.\n" + "\n" + "Link: https://lkml.kernel.org/r/cover.1729014377.git.lorenzo.stoakes@oracle.com\n" + "Link: https://lkml.kernel.org/r/d3691d58bb58712b6fb3df2be441d175bd3cdf07.1729014377.git.lorenzo.stoakes@oracle.com\n" + "Fixes: d24062914837 (\"fork: use __mt_dup() to duplicate maple tree in dup_mmap()\")\n" + "Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>\n" + "Reported-by: Jann Horn <jannh@google.com>\n" + "Reviewed-by: Jann Horn <jannh@google.com>\n" + "Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>\n" + "Cc: Alexander Viro <viro@zeniv.linux.org.uk>\n" + "Cc: Christian Brauner <brauner@kernel.org>\n" + "Cc: Jan Kara <jack@suse.cz>\n" + "Cc: Linus Torvalds <torvalds@linuxfoundation.org>\n" + "Cc: Vlastimil Babka <vbabka@suse.cz>\n" + "Cc: <stable@vger.kernel.org>\n" + "Signed-off-by: Andrew Morton <akpm@linux-foundation.org>\n" + "Signed-off-by: Sasha Levin <sashal@kernel.org>\n" "---\n" - " crypto/aead.c | 3 +--\n" - " crypto/cipher.c | 3 +--\n" - " 2 files changed, 2 insertions(+), 4 deletions(-)\n" - "\n" - "--- a/crypto/aead.c\n" - "+++ b/crypto/aead.c\n" - "@@ -45,8 +45,7 @@ static int setkey_unaligned(struct crypt\n" - " \talignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);\n" - " \tmemcpy(alignbuffer, key, keylen);\n" - " \tret = crypto_aead_alg(tfm)->setkey(tfm, alignbuffer, keylen);\n" - "-\tmemset(alignbuffer, 0, keylen);\n" - "-\tkfree(buffer);\n" - "+\tkzfree(buffer);\n" - " \treturn ret;\n" + " fs/userfaultfd.c | 28 ++++++++++++++++++++++++++++\n" + " include/linux/userfaultfd_k.h | 5 +++++\n" + " kernel/fork.c | 5 ++++-\n" + " 3 files changed, 37 insertions(+), 1 deletion(-)\n" + "\n" + "diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c\n" + "index 27a3e9285fbf6..2f302da629cb4 100644\n" + "--- a/fs/userfaultfd.c\n" + "+++ b/fs/userfaultfd.c\n" + "@@ -731,6 +731,34 @@ void dup_userfaultfd_complete(struct list_head *fcs)\n" + " \t}\n" " }\n" " \n" - "--- a/crypto/cipher.c\n" - "+++ b/crypto/cipher.c\n" - "@@ -38,8 +38,7 @@ static int setkey_unaligned(struct crypt\n" - " \talignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);\n" - " \tmemcpy(alignbuffer, key, keylen);\n" - " \tret = cia->cia_setkey(tfm, alignbuffer, keylen);\n" - "-\tmemset(alignbuffer, 0, keylen);\n" - "-\tkfree(buffer);\n" - "+\tkzfree(buffer);\n" - " \treturn ret;\n" + "+void dup_userfaultfd_fail(struct list_head *fcs)\n" + "+{\n" + "+\tstruct userfaultfd_fork_ctx *fctx, *n;\n" + "+\n" + "+\t/*\n" + "+\t * An error has occurred on fork, we will tear memory down, but have\n" + "+\t * allocated memory for fctx's and raised reference counts for both the\n" + "+\t * original and child contexts (and on the mm for each as a result).\n" + "+\t *\n" + "+\t * These would ordinarily be taken care of by a user handling the event,\n" + "+\t * but we are no longer doing so, so manually clean up here.\n" + "+\t *\n" + "+\t * mm tear down will take care of cleaning up VMA contexts.\n" + "+\t */\n" + "+\tlist_for_each_entry_safe(fctx, n, fcs, list) {\n" + "+\t\tstruct userfaultfd_ctx *octx = fctx->orig;\n" + "+\t\tstruct userfaultfd_ctx *ctx = fctx->new;\n" + "+\n" + "+\t\tatomic_dec(&octx->mmap_changing);\n" + "+\t\tVM_BUG_ON(atomic_read(&octx->mmap_changing) < 0);\n" + "+\t\tuserfaultfd_ctx_put(octx);\n" + "+\t\tuserfaultfd_ctx_put(ctx);\n" + "+\n" + "+\t\tlist_del(&fctx->list);\n" + "+\t\tkfree(fctx);\n" + "+\t}\n" + "+}\n" + "+\n" + " void mremap_userfaultfd_prep(struct vm_area_struct *vma,\n" + " \t\t\t struct vm_userfaultfd_ctx *vm_ctx)\n" + " {\n" + "diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h\n" + "index a12bcf042551e..f4a45a37229ad 100644\n" + "--- a/include/linux/userfaultfd_k.h\n" + "+++ b/include/linux/userfaultfd_k.h\n" + "@@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,\n" + " \n" + " extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);\n" + " extern void dup_userfaultfd_complete(struct list_head *);\n" + "+void dup_userfaultfd_fail(struct list_head *);\n" + " \n" + " extern void mremap_userfaultfd_prep(struct vm_area_struct *,\n" + " \t\t\t\t struct vm_userfaultfd_ctx *);\n" + "@@ -332,6 +333,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l)\n" + " {\n" + " }\n" " \n" - } + "+static inline void dup_userfaultfd_fail(struct list_head *l)\n" + "+{\n" + "+}\n" + "+\n" + " static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,\n" + " \t\t\t\t\t struct vm_userfaultfd_ctx *ctx)\n" + " {\n" + "diff --git a/kernel/fork.c b/kernel/fork.c\n" + "index dbf3c5d81df3b..6423ce60b8f97 100644\n" + "--- a/kernel/fork.c\n" + "+++ b/kernel/fork.c\n" + "@@ -775,7 +775,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,\n" + " \tmmap_write_unlock(mm);\n" + " \tflush_tlb_mm(oldmm);\n" + " \tmmap_write_unlock(oldmm);\n" + "-\tdup_userfaultfd_complete(&uf);\n" + "+\tif (!retval)\n" + "+\t\tdup_userfaultfd_complete(&uf);\n" + "+\telse\n" + "+\t\tdup_userfaultfd_fail(&uf);\n" + " fail_uprobe_end:\n" + " \tuprobe_end_dup_mmap();\n" + " \treturn retval;\n" + "-- \n" + 2.43.0 -33ca6d2101a8cf57c9c5c5d38ae843d66adea3b0706575f5609a428fb9339ce5 +046a6e9de07ac612b07b879e7fded3f248fc1959b12402d015fdfdd7c4ed2a58
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox