linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Lin, Ming" <minggr@gmail.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Simon Ser <contact@emersion.fr>
Cc: Peter Xu <peterx@redhat.com>,
	"Kirill A. Shutemov" <kirill@shutemov.name>,
	Matthew Wilcox <willy@infradead.org>,
	Dan Williams <dan.j.williams@intel.com>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Will Deacon <will@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	David Herrmann <dh.herrmann@gmail.com>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	Greg Kroah-Hartman <greg@kroah.com>,
	"tytso@mit.edu" <tytso@mit.edu>
Subject: Re: Sealed memfd & no-fault mmap
Date: Fri, 28 May 2021 10:07:02 -0700	[thread overview]
Message-ID: <7718ec5b-0a9e-ffa6-16f2-bc0b6afbd9ab@gmail.com> (raw)
In-Reply-To: <CAHk-=wiY1BL-UHPMEAbd7nY3vu6w41A1hhvjg1DoBXWuRt9_qw@mail.gmail.com>

On 5/5/2021 11:42 AM, Linus Torvalds wrote:
> On Wed, May 5, 2021 at 3:21 AM Simon Ser <contact@emersion.fr> wrote:
>>>
>>> Is there some very specific and targeted pattern for that "shared
>>> mapping" case? For example, if it's always a shared anonymous mapping
>>> with no filesystem backing, then that would possibly be a simpler case
>>> than the "random arbitrary shared file descriptor".
>>
>> Yes. I don't know of any Wayland client using buffers with real
>> filesystem backing. I think the main cases are:
>>
>> - shm_open(3) immediately followed by shm_unlink(3). On Linux, this is
>>    implemented with /dev/shm which is a tmpfs.
>> - Abusing /tmp or /run's tmpfs by creating a file there and unlinking
>>    it immediately afterwards. Kind of similar to the first case.
>> - memfd_create(2) on Linux.
>>
>> Is this enough to make it work on shared memory mappings? Is it
>> important that the mapping is anonymous?
> 
> All of those should be anonymous in the sense that the backing store
> is all the kernel's notion of anonymous pages, and there is no actual
> file backing. The mappings may then be shared, of course.
> 
> So that does make Peter's idea to have some inode flag for "don't
> SIGBUS on fault" be more reasonable, because there isn't some random
> actual filesystem involved, only the core VM layer.
> 
> I'm not going to write the patch, though, but maybe you can convince
> somebody else to try it..

Does something like following draft patch on the right track?

1. Application set S_NOFAULT flag on shm mmap fd

	#define S_NOFAULT       (1 << 17)
         fd = shm_open(shmpath, O_RDONLY, S_IRUSR | S_IWUSR);
         ioctl(fd, FS_IOC_GETFLAGS, &flags);
         flags |= S_NOFAULT;
         ioctl(fd, FS_IOC_SETFLAGS, &flags)

2. Don't SIGBUS on read beyond i_size if S_NOFAULT flag set in inode.
    Use zero page instead.

---

[RFC DRAFT PATCH] shm: no SIGBUS fault on out-of-band mmap read
---
  include/linux/fs.h |  2 ++
  mm/shmem.c         | 44 +++++++++++++++++++++++++++++++++++++++++++-
  2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3c88fdb9b2a..a9be7cd71b94 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2202,6 +2202,7 @@ struct super_operations {
  #define S_ENCRYPTED	(1 << 14) /* Encrypted file (using fs/crypto/) */
  #define S_CASEFOLD	(1 << 15) /* Casefolded file */
  #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
+#define S_NOFAULT	(1 << 17) /* No SIGBUS fault on out-of-band mmap read */
  
  /*
   * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2244,6 +2245,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
  #define IS_ENCRYPTED(inode)	((inode)->i_flags & S_ENCRYPTED)
  #define IS_CASEFOLDED(inode)	((inode)->i_flags & S_CASEFOLD)
  #define IS_VERITY(inode)	((inode)->i_flags & S_VERITY)
+#define IS_NOFAULT(inode)	((inode)->i_flags & S_NOFAULT)
  
  #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
  				 (inode)->i_rdev == WHITEOUT_DEV)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5d46611cba8d..856d2d8d4cdf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -38,8 +38,11 @@
  #include <linux/hugetlb.h>
  #include <linux/frontswap.h>
  #include <linux/fs_parser.h>
+#include <linux/fs.h>
+#include <linux/fileattr.h>
  
  #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
+#include <asm/pgalloc.h>
  
  static struct vfsmount *shm_mnt;
  
@@ -1812,7 +1815,27 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
  repeat:
  	if (sgp <= SGP_CACHE &&
  	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
-		return -EINVAL;
+		unsigned long dst_addr = vmf->address;
+		pte_t _dst_pte, *dst_pte;
+		spinlock_t *ptl;
+		int ret;
+
+		if (!IS_NOFAULT(inode))
+			return -EINVAL;
+
+		_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
+					 vma->vm_page_prot));
+		dst_pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, dst_addr, &ptl);
+		ret = -EEXIST;
+		if (!pte_none(*dst_pte))
+			goto out_unlock;
+		set_pte_at(vma->vm_mm, dst_addr, dst_pte, _dst_pte);
+		update_mmu_cache(vma, dst_addr, dst_pte);
+		*fault_type = VM_FAULT_NOPAGE;
+		ret = 0;
+out_unlock:
+		pte_unmap_unlock(dst_pte, ptl);
+		return ret;
  	}
  
  	sbinfo = SHMEM_SB(inode->i_sb);
@@ -3819,6 +3842,23 @@ const struct address_space_operations shmem_aops = {
  };
  EXPORT_SYMBOL(shmem_aops);
  
+static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+
+	fileattr_fill_flags(fa, inode->i_flags);
+
+	return 0;
+}
+
+static int shmem_fileattr_set(struct user_namespace *mnt_userns,
+			      struct dentry *dentry, struct fileattr *fa)
+{
+	struct inode *inode = d_inode(dentry);
+	inode->i_flags = fa->flags;
+	return 0;
+}
+
  static const struct file_operations shmem_file_operations = {
  	.mmap		= shmem_mmap,
  	.get_unmapped_area = shmem_get_unmapped_area,
@@ -3836,6 +3876,8 @@ static const struct file_operations shmem_file_operations = {
  static const struct inode_operations shmem_inode_operations = {
  	.getattr	= shmem_getattr,
  	.setattr	= shmem_setattr,
+	.fileattr_get	= shmem_fileattr_get,
+	.fileattr_set	= shmem_fileattr_set,
  #ifdef CONFIG_TMPFS_XATTR
  	.listxattr	= shmem_listxattr,
  	.set_acl	= simple_set_acl,


  reply	other threads:[~2021-05-28 17:07 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-27  8:24 Sealed memfd & no-fault mmap Simon Ser
2021-04-27 16:51 ` Linus Torvalds
2021-04-29 15:48   ` Kirill A. Shutemov
2021-04-29 18:38     ` Peter Xu
2021-05-04  9:29       ` Simon Ser
2021-05-04 16:08         ` Linus Torvalds
2021-05-05 10:21           ` Simon Ser
2021-05-05 18:42             ` Linus Torvalds
2021-05-28 17:07               ` Lin, Ming [this message]
2021-05-29  1:03                 ` Linus Torvalds
2021-05-29  7:31                   ` Lin, Ming
2021-05-29 15:44                     ` Linus Torvalds
2021-05-29 20:15                       ` Hugh Dickins
2021-05-29 23:36                         ` Ming Lin
2021-05-31 21:13                           ` Ming Lin
2021-06-01  6:24                             ` Linus Torvalds
2021-06-01  7:08                               ` Ming Lin
2021-06-03 13:01                                 ` Simon Ser
2021-06-03 20:07                                   ` Ming Lin
2021-06-03 20:49                                     ` Simon Ser
2021-06-03 13:14                         ` Simon Ser
2021-06-03 13:57                           ` Matthew Wilcox
2021-06-03 14:48                             ` Simon Ser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7718ec5b-0a9e-ffa6-16f2-bc0b6afbd9ab@gmail.com \
    --to=minggr@gmail.com \
    --cc=contact@emersion.fr \
    --cc=dan.j.williams@intel.com \
    --cc=dh.herrmann@gmail.com \
    --cc=greg@kroah.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=peterx@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).