linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Khalid Aziz <khalid.aziz@oracle.com>
To: "Darrick J. Wong" <djwong@kernel.org>
Cc: akpm@linux-foundation.org, willy@infradead.org,
	aneesh.kumar@linux.ibm.com, arnd@arndb.de, 21cnbao@gmail.com,
	corbet@lwn.net, dave.hansen@linux.intel.com, david@redhat.com,
	ebiederm@xmission.com, hagen@jauu.net, jack@suse.cz,
	keescook@chromium.org, kirill@shutemov.name, kucharsk@gmail.com,
	linkinjeon@kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	longpeng2@huawei.com, luto@kernel.org, markhemm@googlemail.com,
	pcc@google.com, rppt@kernel.org, sieberf@amazon.com,
	sjpark@amazon.de, surenb@google.com, tst@schoebel-theuer.de,
	yzaikin@google.com
Subject: Re: [PATCH v2 6/9] mm/mshare: Add mmap operation
Date: Thu, 30 Jun 2022 17:30:35 -0600	[thread overview]
Message-ID: <75aaa2fc-f044-2096-bf6b-740b0075467e@oracle.com> (raw)
In-Reply-To: <Yr4ZKd2J8ucA/npV@magnolia>

On 6/30/22 15:44, Darrick J. Wong wrote:
> On Wed, Jun 29, 2022 at 04:53:57PM -0600, Khalid Aziz wrote:
>> mmap is used to establish address range for mshare region and map the
>> region into process's address space. Add basic mmap operation that
>> supports setting address range. Also fix code to not allocate new
>> mm_struct for files in msharefs that exist for information and not
>> for defining a new mshare region.
>>
>> Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> ---
>>   mm/mshare.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
>>   1 file changed, 41 insertions(+), 7 deletions(-)
>>
>> diff --git a/mm/mshare.c b/mm/mshare.c
>> index d238b68b0576..088a6cab1e93 100644
>> --- a/mm/mshare.c
>> +++ b/mm/mshare.c
>> @@ -9,7 +9,8 @@
>>    *
>>    *
>>    * Copyright (C) 2022 Oracle Corp. All rights reserved.
>> - * Author:	Khalid Aziz <khalid.aziz@oracle.com>
>> + * Authors:	Khalid Aziz <khalid.aziz@oracle.com>
>> + *		Matthew Wilcox <willy@infradead.org>
>>    *
>>    */
>>   
>> @@ -60,9 +61,36 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
>>   	return ret;
>>   }
>>   
>> +static int
>> +msharefs_mmap(struct file *file, struct vm_area_struct *vma)
>> +{
>> +	struct mshare_data *info = file->private_data;
>> +	struct mm_struct *mm = info->mm;
>> +
>> +	/*
>> +	 * If this mshare region has been set up once already, bail out
>> +	 */
>> +	if (mm->mmap_base != 0)
>> +		return -EINVAL;
>> +
>> +	if ((vma->vm_start | vma->vm_end) & (PGDIR_SIZE - 1))
>> +		return -EINVAL;
>> +
>> +	mm->mmap_base = vma->vm_start;
>> +	mm->task_size = vma->vm_end - vma->vm_start;
>> +	if (!mm->task_size)
>> +		mm->task_size--;
>> +	info->minfo->start = mm->mmap_base;
>> +	info->minfo->size = mm->task_size;
> 
> So, uh, if the second mmap() caller decides to ignore the mshare_info,
> should they get an -EINVAL here since the memory mappings won't be at
> the same process virtual address?

Yes, that is in patch 9. A second mmap will result in EINVAL until patch 9 irrespective of address and size passed to mmap.

> 
>> +	vma->vm_flags |= VM_SHARED_PT;
>> +	vma->vm_private_data = info;
>> +	return 0;
>> +}
>> +
>>   static const struct file_operations msharefs_file_operations = {
>>   	.open		= msharefs_open,
>>   	.read_iter	= msharefs_read,
>> +	.mmap		= msharefs_mmap,
>>   	.llseek		= no_llseek,
>>   };
>>   
>> @@ -119,7 +147,12 @@ msharefs_fill_mm(struct inode *inode)
>>   		goto err_free;
>>   	}
>>   	info->mm = mm;
>> -	info->minfo = NULL;
>> +	info->minfo = kzalloc(sizeof(struct mshare_info), GFP_KERNEL);
>> +	if (info->minfo == NULL) {
>> +		retval = -ENOMEM;
>> +		goto err_free;
>> +	}
>> +
>>   	refcount_set(&info->refcnt, 1);
>>   	inode->i_private = info;
>>   
>> @@ -128,13 +161,14 @@ msharefs_fill_mm(struct inode *inode)
>>   err_free:
>>   	if (mm)
>>   		mmput(mm);
>> +	kfree(info->minfo);
>>   	kfree(info);
>>   	return retval;
>>   }
>>   
>>   static struct inode
>>   *msharefs_get_inode(struct super_block *sb, const struct inode *dir,
>> -			umode_t mode)
>> +			umode_t mode, bool newmm)
>>   {
>>   	struct inode *inode = new_inode(sb);
>>   	if (inode) {
>> @@ -147,7 +181,7 @@ static struct inode
>>   		case S_IFREG:
>>   			inode->i_op = &msharefs_file_inode_ops;
>>   			inode->i_fop = &msharefs_file_operations;
>> -			if (msharefs_fill_mm(inode) != 0) {
>> +			if (newmm && msharefs_fill_mm(inode) != 0) {
>>   				discard_new_inode(inode);
>>   				inode = ERR_PTR(-ENOMEM);
>>   			}
>> @@ -177,7 +211,7 @@ msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
>>   	struct inode *inode;
>>   	int err = 0;
>>   
>> -	inode = msharefs_get_inode(dir->i_sb, dir, mode);
>> +	inode = msharefs_get_inode(dir->i_sb, dir, mode, true);
>>   	if (IS_ERR(inode))
>>   		return PTR_ERR(inode);
>>   
>> @@ -267,7 +301,7 @@ prepopulate_files(struct super_block *s, struct inode *dir,
>>   		if (!dentry)
>>   			return -ENOMEM;
>>   
>> -		inode = msharefs_get_inode(s, dir, S_IFREG | files->mode);
>> +		inode = msharefs_get_inode(s, dir, S_IFREG | files->mode, false);
> 
> I was wondering why the information files were getting their own
> mshare_data.
> 
> TBH I'm not really sure what the difference is between mshare_data and
> mshare_info, since those names are not especially distinct.

mshare_data is superset and internal while mshare_info is what is sent back to userspace when it reads a file 
representing an mshare region.

> 
>>   		if (!inode) {
>>   			dput(dentry);
>>   			return -ENOMEM;
>> @@ -301,7 +335,7 @@ msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
>>   	sb->s_d_op		= &msharefs_d_ops;
>>   	sb->s_time_gran		= 1;
>>   
>> -	inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777);
>> +	inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777, false);
> 
> Is it wise to default to world-writable?  Surely whatever userspace
> software wraps an msharefs can relax permissions as needed.
> 

Since this is for the root inode, the default is so any process can create mshare region in msharefs which I think is 
most flexible. Individual userspace app can create mshare regions with any permissions they deem fit using open(). Does 
that make sense?

Thanks,
Khalid

  reply	other threads:[~2022-06-30 23:32 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-29 22:53 [PATCH v2 0/9] Add support for shared PTEs across processes Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 1/9] mm: Add msharefs filesystem Khalid Aziz
2022-06-30 21:53   ` Darrick J. Wong
2022-07-01 16:05     ` Khalid Aziz
2022-06-30 22:57   ` Al Viro
2022-07-01 16:08     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 2/9] mm/mshare: pre-populate msharefs with information file Khalid Aziz
2022-06-30 21:37   ` Darrick J. Wong
2022-06-30 22:54     ` Khalid Aziz
2022-06-30 23:01   ` Al Viro
2022-07-01 16:11     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 3/9] mm/mshare: make msharefs writable and support directories Khalid Aziz
2022-06-30 21:34   ` Darrick J. Wong
2022-06-30 22:49     ` Khalid Aziz
2022-06-30 23:09   ` Al Viro
2022-07-02  0:22     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 4/9] mm/mshare: Add a read operation for msharefs files Khalid Aziz
2022-06-30 21:27   ` Darrick J. Wong
2022-06-30 22:27     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 5/9] mm/mshare: Add vm flag for shared PTE Khalid Aziz
2022-06-30 14:59   ` Mark Hemment
2022-06-30 15:46     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 6/9] mm/mshare: Add mmap operation Khalid Aziz
2022-06-30 21:44   ` Darrick J. Wong
2022-06-30 23:30     ` Khalid Aziz [this message]
2022-06-29 22:53 ` [PATCH v2 7/9] mm/mshare: Add unlink and munmap support Khalid Aziz
2022-06-30 21:50   ` Darrick J. Wong
2022-07-01 15:58     ` Khalid Aziz
2022-06-29 22:53 ` [PATCH v2 8/9] mm/mshare: Add basic page table sharing support Khalid Aziz
2022-07-07  9:13   ` Xin Hao
2022-07-07 15:33     ` Khalid Aziz
2022-06-29 22:54 ` [PATCH v2 9/9] mm/mshare: Enable mshare region mapping across processes Khalid Aziz
2022-06-30 11:57 ` [PATCH v2 0/9] Add support for shared PTEs " Mark Hemment
2022-06-30 15:39   ` Khalid Aziz
2022-07-02  4:24 ` Andrew Morton
2022-07-06 19:26   ` Khalid Aziz
2022-07-08 11:47   ` David Hildenbrand
2022-07-08 19:36     ` Khalid Aziz
2022-07-13 14:00       ` David Hildenbrand
2022-07-13 17:58         ` Mike Kravetz
2022-07-13 18:03           ` David Hildenbrand
2022-07-14 22:02         ` Khalid Aziz
2022-07-18 12:59           ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=75aaa2fc-f044-2096-bf6b-740b0075467e@oracle.com \
    --to=khalid.aziz@oracle.com \
    --cc=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=arnd@arndb.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=djwong@kernel.org \
    --cc=ebiederm@xmission.com \
    --cc=hagen@jauu.net \
    --cc=jack@suse.cz \
    --cc=keescook@chromium.org \
    --cc=kirill@shutemov.name \
    --cc=kucharsk@gmail.com \
    --cc=linkinjeon@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longpeng2@huawei.com \
    --cc=luto@kernel.org \
    --cc=markhemm@googlemail.com \
    --cc=pcc@google.com \
    --cc=rppt@kernel.org \
    --cc=sieberf@amazon.com \
    --cc=sjpark@amazon.de \
    --cc=surenb@google.com \
    --cc=tst@schoebel-theuer.de \
    --cc=willy@infradead.org \
    --cc=yzaikin@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).