* [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR
@ 2015-04-16 7:23 Michael Tirado
2015-04-16 8:14 ` Konstantin Khlebnikov
0 siblings, 1 reply; 9+ messages in thread
From: Michael Tirado @ 2015-04-16 7:23 UTC (permalink / raw)
To: hughd; +Cc: linux-mm
Hi everyone, I have 2 questions (see comments marked with "Question:")
that I am hoping to get some input on. Any feedback in general you can offer
is greatly appreciated. Most importantly, I would like to be sure that this
is a valid way to implement such a seal. This is my first kernel modification
and I haven't been following the mailing list for very long (for the record
in case there is a dumb mistake in here) I don't know any kernel devs and
figured this would be the most appropriate place to find some useful feedback.
This seal is similar to F_SEAL_WRITE, but will allow the task that created the
memfd to continue writing and retain a single shared writable mapping. Needed for
one-way communication between processes, authenticated at the task level.
Currently the only way to accomplish this is by constantly creating, filling,
sealing write, then sending memfd. Also, a different name suggestion is welcome.
Signed-off-by: Michael R. Tirado <mtirado418@gmail.com>
---
include/linux/shmem_fs.h | 1 +
include/uapi/linux/fcntl.h | 1 +
kernel/fork.c | 1 +
mm/shmem.c | 77 +++++++++++++++++++--
tools/testing/selftests/memfd/memfd_test.c | 107 +++++++++++++++++++++++++++++
5 files changed, 182 insertions(+), 5 deletions(-)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 50777b5..ee25ab3 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -12,6 +12,7 @@
struct shmem_inode_info {
spinlock_t lock;
+ void *creator; /* for authentication only */
unsigned int seals; /* shmem seals */
unsigned long flags;
unsigned long alloced; /* data pages alloced to file */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index beed138..f339f22 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -40,6 +40,7 @@
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#define F_SEAL_WRITE_NONCREATOR 0x0010 /* prevent writes if not creator task */
/* (1U << 31) is reserved for signed error codes */
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139..f1a35d0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -434,6 +434,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (tmp->vm_flags & VM_DENYWRITE)
atomic_dec(&inode->i_writecount);
i_mmap_lock_write(mapping);
+ /*Question: should this be atomic_inc_unless_negative, or is this negligible since it should never be reached?*/
if (tmp->vm_flags & VM_SHARED)
atomic_inc(&mapping->i_mmap_writable);
flush_dcache_mmap_lock(mapping);
diff --git a/mm/shmem.c b/mm/shmem.c
index cf2d0ca..1e35bc2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1481,9 +1481,12 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
/* i_mutex is held by caller */
- if (unlikely(info->seals)) {
- if (info->seals & F_SEAL_WRITE)
+ if (info->seals) {
+ if (info->seals & F_SEAL_WRITE_NONCREATOR && info->creator == current)
+ goto skip_write_seal;
+ if (info->seals & (F_SEAL_WRITE | F_SEAL_WRITE_NONCREATOR))
return -EPERM;
+skip_write_seal:
if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
return -EPERM;
}
@@ -1938,10 +1941,52 @@ continue_resched:
return error;
}
+/* returns 0 if ok, error if seal cannot be applied */
+static int shmem_seal_noncreator(struct file *file, unsigned int seals,
+ struct shmem_inode_info *info)
+{
+ struct vm_area_struct *vma = NULL;
+ struct vm_area_struct *curvma;
+ int c = 0;
+
+ if (seals & F_SEAL_WRITE || info->seals & F_SEAL_WRITE)
+ return -EPERM; /* these two seals cannot coexist */
+
+ if (atomic_read(&file->f_mapping->i_mmap_writable) == 0
+ || info->seals & F_SEAL_WRITE_NONCREATOR)
+ return 0;
+
+ if (atomic_read(&file->f_mapping->i_mmap_writable) > 1
+ || current != info->creator)
+ return -EPERM;
+
+ /*
+ * search current task vma's for the file
+ * ensure that only one writable shared mapping exists
+ */
+ for (curvma = current->mm->mmap; curvma; curvma = curvma->vm_next) {
+ if (curvma->vm_file == file) {
+ if (curvma->vm_flags & (VM_WRITE | VM_SHARED)) {
+ if (++c > 1)
+ return -EPERM;
+ vma = curvma;
+ }
+ }
+ }
+ if (vma == NULL)
+ return -EPERM;
+
+ vma->vm_flags |= VM_DONTCOPY | VM_DENYWRITE;
+ mapping_unmap_writable(file->f_mapping);
+ return mapping_deny_writable(file->f_mapping);
+}
+
+
#define F_ALL_SEALS (F_SEAL_SEAL | \
F_SEAL_SHRINK | \
F_SEAL_GROW | \
- F_SEAL_WRITE)
+ F_SEAL_WRITE | \
+ F_SEAL_WRITE_NONCREATOR)
int shmem_add_seals(struct file *file, unsigned int seals)
{
@@ -1965,6 +2010,9 @@ int shmem_add_seals(struct file *file, unsigned int seals)
* SEAL_SHRINK: Prevent the file from shrinking
* SEAL_GROW: Prevent the file from growing
* SEAL_WRITE: Prevent write access to the file
+ * SEAL_WRITE_NONCREATOR: same effect as SEAL_WRITE, except the task
+ * that created the file is allowed to write, and
+ * retain a single writable shared mapping.
*
* As we don't require any trust relationship between two parties, we
* must prevent seals from being removed. Therefore, sealing a file
@@ -1993,7 +2041,16 @@ int shmem_add_seals(struct file *file, unsigned int seals)
goto unlock;
}
+ if (seals & F_SEAL_WRITE_NONCREATOR) {
+ error = shmem_seal_noncreator(file, seals, info);
+ if (error)
+ goto unlock;
+ }
if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
+ if (info->seals & F_SEAL_WRITE_NONCREATOR) {
+ error = -EPERM;
+ goto unlock;
+ }
error = mapping_deny_writable(file->f_mapping);
if (error)
goto unlock;
@@ -2068,11 +2125,19 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
/* protected by i_mutex */
+ if (info->seals & F_SEAL_WRITE_NONCREATOR) {
+ if(current == info->creator)
+ goto skip_write_seal;
+ else {
+ error = -EPERM;
+ goto out;
+ }
+ }
if (info->seals & F_SEAL_WRITE) {
error = -EPERM;
goto out;
}
-
+skip_write_seal:
shmem_falloc.waitq = &shmem_falloc_waitq;
shmem_falloc.start = unmap_start >> PAGE_SHIFT;
shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -2960,8 +3025,10 @@ SYSCALL_DEFINE2(memfd_create,
info = SHMEM_I(file_inode(file));
file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
file->f_flags |= O_RDWR | O_LARGEFILE;
- if (flags & MFD_ALLOW_SEALING)
+ if (flags & MFD_ALLOW_SEALING) {
info->seals &= ~F_SEAL_SEAL;
+ info->creator = current;
+ }/* Question: do we not want a clear info->seals? why the &= ? */
fd_install(fd, file);
kfree(name);
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 0b9eafb..bc1f829 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -321,6 +321,18 @@ static void mfd_assert_write(int fd)
}
}
+static void mfd_assert_write_nommap(int fd)
+{
+ ssize_t l;
+
+ /* verify write() succeeds */
+ l = write(fd, "\0\0\0\0", 4);
+ if (l != 4) {
+ printf("write() failed: %m\n");
+ abort();
+ }
+}
+
static void mfd_fail_write(int fd)
{
ssize_t l;
@@ -652,6 +664,99 @@ static void test_seal_write(void)
close(fd);
}
+
+/*
+ * Test SEAL_WRITE_NONCREATOR
+ * Test whether SEAL_WRITE_NONCREATOR prevents modifications for all processes
+ * except for the one that created the memfd, and also closes mapping on fork.
+ */
+static void test_seal_write_noncreator()
+{
+ int fd;
+ void *p, *p2, *privmap, *privmap2;
+ pid_t pid;
+ int status;
+
+ fd = mfd_assert_new("kern_memfd_seal_write_noncreator",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ /* create 2 shared|writes, and one private|read */
+ mfd_assert_has_seals(fd, 0);
+ p = mfd_assert_mmap_shared(fd);
+ p2 = mfd_assert_mmap_shared(fd);
+ privmap = mfd_assert_mmap_private(fd);
+
+ /* verify that seal fails if multiple shared write mappings present*/
+ mfd_fail_add_seals(fd, F_SEAL_WRITE_NONCREATOR);
+ munmap(p2, MFD_DEF_SIZE); /*unmap so theres only 1 shared|write*/
+
+ /* F_SEAL_WRITE_NONCREATOR and F_SEAL_WRITE cannot coexist */
+ mfd_assert_add_seals(fd, F_SEAL_WRITE_NONCREATOR);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE_NONCREATOR);
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+
+ /* private mappings with read|write end up having vma with
+ * VM_SHARED set, which this seal checks and will allow only one
+ * to exist. If more than one VM_SHARED exists, the seal fails.
+ * so any private mappings with PROT_WRITE need to be created after
+ * F_SEAL_WRITE_NONCREATOR has been applied.
+ */
+ privmap2 = mmap(NULL, MFD_DEF_SIZE,
+ PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (privmap2 == MAP_FAILED)
+ abort();
+
+ /* verify that no further shared|write mappings can be made. */
+ p2 = mmap(NULL, MFD_DEF_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd, 0);
+ if (p2 != MAP_FAILED)
+ abort();
+
+ mfd_assert_write_nommap(fd);
+ mfd_assert_read(fd);
+ mfd_assert_shrink(fd);
+ mfd_assert_grow(fd);
+ mfd_assert_grow_write(fd);
+ memset(p, 'A', MFD_DEF_SIZE);
+ memset(privmap2, 'B', MFD_DEF_SIZE);
+
+ /* check authentication */
+ pid = fork();
+ if (pid == 0) /*this new process is not creator, writes should fail*/
+ {
+ mfd_fail_write(fd);
+ mfd_fail_grow_write(fd);
+ mfd_assert_read(fd);
+ if (*(char *)privmap != 'A' || *(char *)privmap2 != 'B')
+ exit(-1); /* just double checking */
+ memset(privmap2, 'Y', MFD_DEF_SIZE);
+ printf("|----: expecting segfault in forked process...\n");
+ memset(p, 'X', MFD_DEF_SIZE);
+ printf("|----: did not crash :(\n");
+ close(fd);
+ exit(-1);
+ }
+
+ /* abort if other process did not crash */
+ pid = waitpid(pid, &status, 0);
+ if (WIFEXITED(status))
+ abort();
+
+ /*tinfoil level error checking */
+ if (*(char *)privmap != 'A'
+ || *(char *)privmap2 != 'B'
+ || *(char *)p != 'A')
+ abort();
+
+ munmap(p, MFD_DEF_SIZE);
+ munmap(privmap, MFD_DEF_SIZE);
+ munmap(privmap2, MFD_DEF_SIZE);
+ close(fd);
+}
+
/*
* Test SEAL_SHRINK
* Test whether SEAL_SHRINK actually prevents shrinking
@@ -882,6 +987,8 @@ int main(int argc, char **argv)
test_seal_grow();
printf("memfd: SEAL-RESIZE\n");
test_seal_resize();
+ printf("memfd: SEAL-WRITE-NONCREATOR\n");
+ test_seal_write_noncreator();
printf("memfd: SHARE-DUP\n");
test_share_dup();
--
1.8.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR
2015-04-16 7:23 [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR Michael Tirado
@ 2015-04-16 8:14 ` Konstantin Khlebnikov
2015-04-16 12:01 ` David Herrmann
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Konstantin Khlebnikov @ 2015-04-16 8:14 UTC (permalink / raw)
To: Michael Tirado; +Cc: Hugh Dickins, linux-mm@kvack.org
On Thu, Apr 16, 2015 at 10:23 AM, Michael Tirado <mtirado418@gmail.com> wrote:
> Hi everyone, I have 2 questions (see comments marked with "Question:")
> that I am hoping to get some input on. Any feedback in general you can offer
> is greatly appreciated. Most importantly, I would like to be sure that this
> is a valid way to implement such a seal. This is my first kernel modification
> and I haven't been following the mailing list for very long (for the record
> in case there is a dumb mistake in here) I don't know any kernel devs and
> figured this would be the most appropriate place to find some useful feedback.
>
> This seal is similar to F_SEAL_WRITE, but will allow the task that created the
> memfd to continue writing and retain a single shared writable mapping. Needed for
> one-way communication between processes, authenticated at the task level.
> Currently the only way to accomplish this is by constantly creating, filling,
> sealing write, then sending memfd. Also, a different name suggestion is welcome.
I guess that was in original design but was dropped for some reason.
Probably that approach couldn't be implemented without flaws or overhead.
Keeping pointer to priviledged task is a bad idea.
There is no easy way to drop it when task exits and this doesn't work
for threads.
I think it's better to keep pointer to priveledged struct file and
drop it in method
f_op->release() when task closes fd or exits. Server task could obtain second
non-priveledged fd and struct file for that inode via
open(/proc/../fd/), dup3(),
openat() or something else and send it to read-only users.
>
> Signed-off-by: Michael R. Tirado <mtirado418@gmail.com>
> ---
> include/linux/shmem_fs.h | 1 +
> include/uapi/linux/fcntl.h | 1 +
> kernel/fork.c | 1 +
> mm/shmem.c | 77 +++++++++++++++++++--
> tools/testing/selftests/memfd/memfd_test.c | 107 +++++++++++++++++++++++++++++
> 5 files changed, 182 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 50777b5..ee25ab3 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -12,6 +12,7 @@
>
> struct shmem_inode_info {
> spinlock_t lock;
> + void *creator; /* for authentication only */
> unsigned int seals; /* shmem seals */
> unsigned long flags;
> unsigned long alloced; /* data pages alloced to file */
> diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
> index beed138..f339f22 100644
> --- a/include/uapi/linux/fcntl.h
> +++ b/include/uapi/linux/fcntl.h
> @@ -40,6 +40,7 @@
> #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
> #define F_SEAL_GROW 0x0004 /* prevent file from growing */
> #define F_SEAL_WRITE 0x0008 /* prevent writes */
> +#define F_SEAL_WRITE_NONCREATOR 0x0010 /* prevent writes if not creator task */
> /* (1U << 31) is reserved for signed error codes */
>
> /*
> diff --git a/kernel/fork.c b/kernel/fork.c
> index cf65139..f1a35d0 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -434,6 +434,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
> if (tmp->vm_flags & VM_DENYWRITE)
> atomic_dec(&inode->i_writecount);
> i_mmap_lock_write(mapping);
> + /*Question: should this be atomic_inc_unless_negative, or is this negligible since it should never be reached?*/
> if (tmp->vm_flags & VM_SHARED)
> atomic_inc(&mapping->i_mmap_writable);
> flush_dcache_mmap_lock(mapping);
> diff --git a/mm/shmem.c b/mm/shmem.c
> index cf2d0ca..1e35bc2 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1481,9 +1481,12 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
> pgoff_t index = pos >> PAGE_CACHE_SHIFT;
>
> /* i_mutex is held by caller */
> - if (unlikely(info->seals)) {
> - if (info->seals & F_SEAL_WRITE)
> + if (info->seals) {
> + if (info->seals & F_SEAL_WRITE_NONCREATOR && info->creator == current)
> + goto skip_write_seal;
> + if (info->seals & (F_SEAL_WRITE | F_SEAL_WRITE_NONCREATOR))
> return -EPERM;
> +skip_write_seal:
> if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
> return -EPERM;
> }
> @@ -1938,10 +1941,52 @@ continue_resched:
> return error;
> }
>
> +/* returns 0 if ok, error if seal cannot be applied */
> +static int shmem_seal_noncreator(struct file *file, unsigned int seals,
> + struct shmem_inode_info *info)
> +{
> + struct vm_area_struct *vma = NULL;
> + struct vm_area_struct *curvma;
> + int c = 0;
> +
> + if (seals & F_SEAL_WRITE || info->seals & F_SEAL_WRITE)
> + return -EPERM; /* these two seals cannot coexist */
> +
> + if (atomic_read(&file->f_mapping->i_mmap_writable) == 0
> + || info->seals & F_SEAL_WRITE_NONCREATOR)
> + return 0;
> +
> + if (atomic_read(&file->f_mapping->i_mmap_writable) > 1
> + || current != info->creator)
> + return -EPERM;
> +
> + /*
> + * search current task vma's for the file
> + * ensure that only one writable shared mapping exists
> + */
> + for (curvma = current->mm->mmap; curvma; curvma = curvma->vm_next) {
> + if (curvma->vm_file == file) {
> + if (curvma->vm_flags & (VM_WRITE | VM_SHARED)) {
> + if (++c > 1)
> + return -EPERM;
> + vma = curvma;
> + }
> + }
> + }
> + if (vma == NULL)
> + return -EPERM;
> +
> + vma->vm_flags |= VM_DONTCOPY | VM_DENYWRITE;
> + mapping_unmap_writable(file->f_mapping);
> + return mapping_deny_writable(file->f_mapping);
> +}
> +
> +
> #define F_ALL_SEALS (F_SEAL_SEAL | \
> F_SEAL_SHRINK | \
> F_SEAL_GROW | \
> - F_SEAL_WRITE)
> + F_SEAL_WRITE | \
> + F_SEAL_WRITE_NONCREATOR)
>
> int shmem_add_seals(struct file *file, unsigned int seals)
> {
> @@ -1965,6 +2010,9 @@ int shmem_add_seals(struct file *file, unsigned int seals)
> * SEAL_SHRINK: Prevent the file from shrinking
> * SEAL_GROW: Prevent the file from growing
> * SEAL_WRITE: Prevent write access to the file
> + * SEAL_WRITE_NONCREATOR: same effect as SEAL_WRITE, except the task
> + * that created the file is allowed to write, and
> + * retain a single writable shared mapping.
> *
> * As we don't require any trust relationship between two parties, we
> * must prevent seals from being removed. Therefore, sealing a file
> @@ -1993,7 +2041,16 @@ int shmem_add_seals(struct file *file, unsigned int seals)
> goto unlock;
> }
>
> + if (seals & F_SEAL_WRITE_NONCREATOR) {
> + error = shmem_seal_noncreator(file, seals, info);
> + if (error)
> + goto unlock;
> + }
> if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
> + if (info->seals & F_SEAL_WRITE_NONCREATOR) {
> + error = -EPERM;
> + goto unlock;
> + }
> error = mapping_deny_writable(file->f_mapping);
> if (error)
> goto unlock;
> @@ -2068,11 +2125,19 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
> DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
>
> /* protected by i_mutex */
> + if (info->seals & F_SEAL_WRITE_NONCREATOR) {
> + if(current == info->creator)
> + goto skip_write_seal;
> + else {
> + error = -EPERM;
> + goto out;
> + }
> + }
> if (info->seals & F_SEAL_WRITE) {
> error = -EPERM;
> goto out;
> }
> -
> +skip_write_seal:
> shmem_falloc.waitq = &shmem_falloc_waitq;
> shmem_falloc.start = unmap_start >> PAGE_SHIFT;
> shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
> @@ -2960,8 +3025,10 @@ SYSCALL_DEFINE2(memfd_create,
> info = SHMEM_I(file_inode(file));
> file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
> file->f_flags |= O_RDWR | O_LARGEFILE;
> - if (flags & MFD_ALLOW_SEALING)
> + if (flags & MFD_ALLOW_SEALING) {
> info->seals &= ~F_SEAL_SEAL;
> + info->creator = current;
> + }/* Question: do we not want a clear info->seals? why the &= ? */
>
> fd_install(fd, file);
> kfree(name);
> diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
> index 0b9eafb..bc1f829 100644
> --- a/tools/testing/selftests/memfd/memfd_test.c
> +++ b/tools/testing/selftests/memfd/memfd_test.c
> @@ -321,6 +321,18 @@ static void mfd_assert_write(int fd)
> }
> }
>
> +static void mfd_assert_write_nommap(int fd)
> +{
> + ssize_t l;
> +
> + /* verify write() succeeds */
> + l = write(fd, "\0\0\0\0", 4);
> + if (l != 4) {
> + printf("write() failed: %m\n");
> + abort();
> + }
> +}
> +
> static void mfd_fail_write(int fd)
> {
> ssize_t l;
> @@ -652,6 +664,99 @@ static void test_seal_write(void)
> close(fd);
> }
>
> +
> +/*
> + * Test SEAL_WRITE_NONCREATOR
> + * Test whether SEAL_WRITE_NONCREATOR prevents modifications for all processes
> + * except for the one that created the memfd, and also closes mapping on fork.
> + */
> +static void test_seal_write_noncreator()
> +{
> + int fd;
> + void *p, *p2, *privmap, *privmap2;
> + pid_t pid;
> + int status;
> +
> + fd = mfd_assert_new("kern_memfd_seal_write_noncreator",
> + MFD_DEF_SIZE,
> + MFD_CLOEXEC | MFD_ALLOW_SEALING);
> +
> + /* create 2 shared|writes, and one private|read */
> + mfd_assert_has_seals(fd, 0);
> + p = mfd_assert_mmap_shared(fd);
> + p2 = mfd_assert_mmap_shared(fd);
> + privmap = mfd_assert_mmap_private(fd);
> +
> + /* verify that seal fails if multiple shared write mappings present*/
> + mfd_fail_add_seals(fd, F_SEAL_WRITE_NONCREATOR);
> + munmap(p2, MFD_DEF_SIZE); /*unmap so theres only 1 shared|write*/
> +
> + /* F_SEAL_WRITE_NONCREATOR and F_SEAL_WRITE cannot coexist */
> + mfd_assert_add_seals(fd, F_SEAL_WRITE_NONCREATOR);
> + mfd_assert_has_seals(fd, F_SEAL_WRITE_NONCREATOR);
> + mfd_fail_add_seals(fd, F_SEAL_WRITE);
> +
> + /* private mappings with read|write end up having vma with
> + * VM_SHARED set, which this seal checks and will allow only one
> + * to exist. If more than one VM_SHARED exists, the seal fails.
> + * so any private mappings with PROT_WRITE need to be created after
> + * F_SEAL_WRITE_NONCREATOR has been applied.
> + */
> + privmap2 = mmap(NULL, MFD_DEF_SIZE,
> + PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
> + if (privmap2 == MAP_FAILED)
> + abort();
> +
> + /* verify that no further shared|write mappings can be made. */
> + p2 = mmap(NULL, MFD_DEF_SIZE,
> + PROT_READ | PROT_WRITE,
> + MAP_SHARED,
> + fd, 0);
> + if (p2 != MAP_FAILED)
> + abort();
> +
> + mfd_assert_write_nommap(fd);
> + mfd_assert_read(fd);
> + mfd_assert_shrink(fd);
> + mfd_assert_grow(fd);
> + mfd_assert_grow_write(fd);
> + memset(p, 'A', MFD_DEF_SIZE);
> + memset(privmap2, 'B', MFD_DEF_SIZE);
> +
> + /* check authentication */
> + pid = fork();
> + if (pid == 0) /*this new process is not creator, writes should fail*/
> + {
> + mfd_fail_write(fd);
> + mfd_fail_grow_write(fd);
> + mfd_assert_read(fd);
> + if (*(char *)privmap != 'A' || *(char *)privmap2 != 'B')
> + exit(-1); /* just double checking */
> + memset(privmap2, 'Y', MFD_DEF_SIZE);
> + printf("|----: expecting segfault in forked process...\n");
> + memset(p, 'X', MFD_DEF_SIZE);
> + printf("|----: did not crash :(\n");
> + close(fd);
> + exit(-1);
> + }
> +
> + /* abort if other process did not crash */
> + pid = waitpid(pid, &status, 0);
> + if (WIFEXITED(status))
> + abort();
> +
> + /*tinfoil level error checking */
> + if (*(char *)privmap != 'A'
> + || *(char *)privmap2 != 'B'
> + || *(char *)p != 'A')
> + abort();
> +
> + munmap(p, MFD_DEF_SIZE);
> + munmap(privmap, MFD_DEF_SIZE);
> + munmap(privmap2, MFD_DEF_SIZE);
> + close(fd);
> +}
> +
> /*
> * Test SEAL_SHRINK
> * Test whether SEAL_SHRINK actually prevents shrinking
> @@ -882,6 +987,8 @@ int main(int argc, char **argv)
> test_seal_grow();
> printf("memfd: SEAL-RESIZE\n");
> test_seal_resize();
> + printf("memfd: SEAL-WRITE-NONCREATOR\n");
> + test_seal_write_noncreator();
>
> printf("memfd: SHARE-DUP\n");
> test_share_dup();
> --
> 1.8.4
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR
2015-04-16 8:14 ` Konstantin Khlebnikov
@ 2015-04-16 12:01 ` David Herrmann
2015-04-17 4:28 ` Michael Tirado
2015-04-17 4:18 ` Michael Tirado
2015-04-28 13:28 ` [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_PEER Michael Tirado
2 siblings, 1 reply; 9+ messages in thread
From: David Herrmann @ 2015-04-16 12:01 UTC (permalink / raw)
To: Konstantin Khlebnikov; +Cc: Michael Tirado, Hugh Dickins, linux-mm@kvack.org
Hi
On Thu, Apr 16, 2015 at 10:14 AM, Konstantin Khlebnikov
<koct9i@gmail.com> wrote:
> On Thu, Apr 16, 2015 at 10:23 AM, Michael Tirado <mtirado418@gmail.com> wrote:
>> Hi everyone, I have 2 questions (see comments marked with "Question:")
>> that I am hoping to get some input on. Any feedback in general you can offer
>> is greatly appreciated. Most importantly, I would like to be sure that this
>> is a valid way to implement such a seal. This is my first kernel modification
>> and I haven't been following the mailing list for very long (for the record
>> in case there is a dumb mistake in here) I don't know any kernel devs and
>> figured this would be the most appropriate place to find some useful feedback.
>>
>> This seal is similar to F_SEAL_WRITE, but will allow the task that created the
>> memfd to continue writing and retain a single shared writable mapping. Needed for
>> one-way communication between processes, authenticated at the task level.
>> Currently the only way to accomplish this is by constantly creating, filling,
>> sealing write, then sending memfd. Also, a different name suggestion is welcome.
>
> I guess that was in original design but was dropped for some reason.
No. This is not what sealing is about. Seals are a property of an
object, they're unrelated to the process accessing it. Sealing is not
an access-control method, but describes the state and capabilities of
a file.
The same functionality of F_SEAL_WRITE_NONCREATOR can be achieved by
opening /proc/self/fd/<num> with O_RDONLY. Just pass that read-only FD
to your peers but retain the writable one. But note that you must
verify your peers do not have the same uid as you do, otherwise they
can just gain a writable descriptor by opening /proc/self/fd/<num>
themselves.
Thanks
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR
2015-04-16 12:01 ` David Herrmann
@ 2015-04-17 4:28 ` Michael Tirado
2015-04-17 10:48 ` David Herrmann
0 siblings, 1 reply; 9+ messages in thread
From: Michael Tirado @ 2015-04-17 4:28 UTC (permalink / raw)
To: David Herrmann; +Cc: linux-mm
David,
Thank you for the reply and your work on memfd.
It's been a fun learning experience working with this code.
On Thu, 16 Apr 2015 14:01:07 +0200
David Herrmann <dh.herrmann@gmail.com> wrote:
> No. This is not what sealing is about. Seals are a property of an
> object, they're unrelated to the process accessing it. Sealing is not
> an access-control method, but describes the state and capabilities of
> a file.
The comments on sealing at the top of shmem_add_seals lead me to believe
that seals were in place specifically for access control purposes.
> The same functionality of F_SEAL_WRITE_NONCREATOR can be achieved by
> opening /proc/self/fd/<num> with O_RDONLY. Just pass that read-only FD
> to your peers but retain the writable one. But note that you must
> verify your peers do not have the same uid as you do, otherwise they
> can just gain a writable descriptor by opening /proc/self/fd/<num>
> themselves.
>
> Thanks
> David
My peers may be any uid, in the same or different pid namespace.
I would really like to not have to maintain an AF_UNIX connection to receive
memfd's with a write seal. It does make a lot of sense for multicasting,
but I feel memfd would be more versitile if there was a concept of ownership,
or directionality, so a user could shed the socket after SCM_RIGHTS message or
even fork without a socket at all. I am more than willing to put in the work
if someone can offer advice on how to better achieve this type of shared
memory. Maybe it's already out there and I just didn't know where to look?
-Michael
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR
2015-04-17 4:28 ` Michael Tirado
@ 2015-04-17 10:48 ` David Herrmann
2015-04-17 22:45 ` Michael Tirado
0 siblings, 1 reply; 9+ messages in thread
From: David Herrmann @ 2015-04-17 10:48 UTC (permalink / raw)
To: Michael Tirado; +Cc: linux-mm
Hi
On Fri, Apr 17, 2015 at 6:28 AM, Michael Tirado <mtirado418@gmail.com> wrote:
> On Thu, 16 Apr 2015 14:01:07 +0200
> David Herrmann <dh.herrmann@gmail.com> wrote:
>> The same functionality of F_SEAL_WRITE_NONCREATOR can be achieved by
>> opening /proc/self/fd/<num> with O_RDONLY. Just pass that read-only FD
>> to your peers but retain the writable one. But note that you must
>> verify your peers do not have the same uid as you do, otherwise they
>> can just gain a writable descriptor by opening /proc/self/fd/<num>
>> themselves.
>
> My peers may be any uid,
Where's the problem? Just pass the read-only file-descriptor to your
peers and make sure the access-mode of the memfd is 0600. No other
user will be able to gain a writable file-descriptor, but you.
Thanks
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR
2015-04-17 10:48 ` David Herrmann
@ 2015-04-17 22:45 ` Michael Tirado
2015-04-18 12:13 ` David Herrmann
0 siblings, 1 reply; 9+ messages in thread
From: Michael Tirado @ 2015-04-17 22:45 UTC (permalink / raw)
To: David Herrmann; +Cc: linux-mm
On Fri, 17 Apr 2015 12:48:44 +0200
David Herrmann <dh.herrmann@gmail.com> wrote:
> Where's the problem? Just pass the read-only file-descriptor to your
> peers and make sure the access-mode of the memfd is 0600. No other
> user will be able to gain a writable file-descriptor, but you.
I see what you mean now, This does make sense. I started writing a test
and it seems like the write on a duplicated O_RDONLY fd does not fail
properly, and is causing a general protection error. Here is the output
and test code:
memfd: a dup test
expected EPERM on write(), but got 4: Operation not permitted
back in main thread
[ 8.563759] traps: memfd_test[548] general protection ip:b75b638c sp:bffdbbe0 error:0 in libc-2.20.so[b7589000+1ae000]
bash-4.3#
note that the return value 4 indicates successful write.
static void test_dup()
{
pid_t pid;
int status;
int fd_seal;
int fd_rdonly = 99;
fd_seal = mfd_assert_new("kern_memfd_seal_write",
MFD_DEF_SIZE,
MFD_CLOEXEC | MFD_ALLOW_SEALING);
fd_rdonly = dup3(fd_seal, fd_rdonly, O_RDONLY);
mfd_assert_add_seals(fd_seal, F_SEAL_SEAL);
if (fd_rdonly != 99) {
printf("dup3 error: %m\n");
abort();
}
pid = fork();
if (pid == 0)
{
int fd_peer = 97;
/*mfd_fail_write(fd_seal);*/
/* this does not fail properly? */
mfd_fail_write(fd_rdonly);
/* this will fail with, invalid argument */
/*fd_peer = dup3(fd_rdonly, fd_peer, O_RDWR);
if (fd_peer == -1) {
printf("dup3 error: %m\n");
abort();
}
mfd_fail_write(fd_peer);*/
printf("exiting normally\n");
exit(0);
}
usleep(100000);
printf("back in main thread\n");
mfd_assert_write(fd_seal);
/*mfd_fail_write(fd_rdonly);*/
usleep(1000000);
/* this seems to trigger general protection crash */
pid = waitpid(pid, &status, 0);
if (!WIFEXITED(status))
abort();
}
I don't have time right now to dig deep into this, but will look into it more
in the next few days, and report back.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR
2015-04-17 22:45 ` Michael Tirado
@ 2015-04-18 12:13 ` David Herrmann
0 siblings, 0 replies; 9+ messages in thread
From: David Herrmann @ 2015-04-18 12:13 UTC (permalink / raw)
To: Michael Tirado; +Cc: linux-mm
Hi
On Sat, Apr 18, 2015 at 12:45 AM, Michael Tirado <mtirado418@gmail.com> wrote:
> On Fri, 17 Apr 2015 12:48:44 +0200
> David Herrmann <dh.herrmann@gmail.com> wrote:
>
>> Where's the problem? Just pass the read-only file-descriptor to your
>> peers and make sure the access-mode of the memfd is 0600. No other
>> user will be able to gain a writable file-descriptor, but you.
>
> I see what you mean now, This does make sense. I started writing a test
> and it seems like the write on a duplicated O_RDONLY fd does not fail
> properly, and is causing a general protection error. Here is the output
> and test code:
>
>
> memfd: a dup test
> expected EPERM on write(), but got 4: Operation not permitted
> back in main thread
> [ 8.563759] traps: memfd_test[548] general protection ip:b75b638c sp:bffdbbe0 error:0 in libc-2.20.so[b7589000+1ae000]
> bash-4.3#
>
> note that the return value 4 indicates successful write.
>
>
>
> static void test_dup()
> {
> pid_t pid;
> int status;
> int fd_seal;
> int fd_rdonly = 99;
>
> fd_seal = mfd_assert_new("kern_memfd_seal_write",
> MFD_DEF_SIZE,
> MFD_CLOEXEC | MFD_ALLOW_SEALING);
>
> fd_rdonly = dup3(fd_seal, fd_rdonly, O_RDONLY);
> mfd_assert_add_seals(fd_seal, F_SEAL_SEAL);
> if (fd_rdonly != 99) {
> printf("dup3 error: %m\n");
> abort();
> }
You cannot use dup3() to change access-flags. See fcntl(2) for F_SETFL
(which is what dup3(2) basically does). You must create that new
file-descriptor via /proc. Instead, please use:
fd_rdonly = memfd_assert_open(fd_seal, O_RDONLY, 0);
Also, there is no reason to pass MFD_ALLOW_SEALING, nor do you need to
set F_SEAL_SEAL.
Thanks
David
>
> pid = fork();
> if (pid == 0)
> {
> int fd_peer = 97;
>
> /*mfd_fail_write(fd_seal);*/
> /* this does not fail properly? */
> mfd_fail_write(fd_rdonly);
>
> /* this will fail with, invalid argument */
> /*fd_peer = dup3(fd_rdonly, fd_peer, O_RDWR);
> if (fd_peer == -1) {
> printf("dup3 error: %m\n");
> abort();
> }
> mfd_fail_write(fd_peer);*/
> printf("exiting normally\n");
> exit(0);
> }
>
> usleep(100000);
> printf("back in main thread\n");
> mfd_assert_write(fd_seal);
> /*mfd_fail_write(fd_rdonly);*/
> usleep(1000000);
>
> /* this seems to trigger general protection crash */
> pid = waitpid(pid, &status, 0);
> if (!WIFEXITED(status))
> abort();
> }
>
>
> I don't have time right now to dig deep into this, but will look into it more
> in the next few days, and report back.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR
2015-04-16 8:14 ` Konstantin Khlebnikov
2015-04-16 12:01 ` David Herrmann
@ 2015-04-17 4:18 ` Michael Tirado
2015-04-28 13:28 ` [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_PEER Michael Tirado
2 siblings, 0 replies; 9+ messages in thread
From: Michael Tirado @ 2015-04-17 4:18 UTC (permalink / raw)
To: Konstantin Khlebnikov; +Cc: linux-mm
On Thu, 16 Apr 2015 11:14:11 +0300
Konstantin Khlebnikov <koct9i@gmail.com> wrote:
> Keeping pointer to priviledged task is a bad idea.
> There is no easy way to drop it when task exits and this doesn't work
> for threads.
> I think it's better to keep pointer to priveledged struct file and
> drop it in method
> f_op->release() when task closes fd or exits. Server task could obtain second
> non-priveledged fd and struct file for that inode via
> open(/proc/../fd/), dup3(),
> openat() or something else and send it to read-only users.
Thank you, I was hoping someone would suggest a different authentication
method, I will look into this idea. What is the thread concern? I have
not run in to any problems yet while testing, but have been more focused
on getting my user space memfd transport daemon up and running before I put
it through the torture test.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_PEER
2015-04-16 8:14 ` Konstantin Khlebnikov
2015-04-16 12:01 ` David Herrmann
2015-04-17 4:18 ` Michael Tirado
@ 2015-04-28 13:28 ` Michael Tirado
2 siblings, 0 replies; 9+ messages in thread
From: Michael Tirado @ 2015-04-28 13:28 UTC (permalink / raw)
To: Konstantin Khlebnikov; +Cc: linux-mm
On Thu, 16 Apr 2015 11:14:11 +0300
Konstantin Khlebnikov <koct9i@gmail.com> wrote:
> Keeping pointer to priviledged task is a bad idea.
> There is no easy way to drop it when task exits and this doesn't work
> for threads.
> I think it's better to keep pointer to priveledged struct file and
> drop it in method
> f_op->release() when task closes fd or exits. Server task could obtain second
> non-priveledged fd and struct file for that inode via
> open(/proc/../fd/), dup3(),
> openat() or something else and send it to read-only users.
Do you mind taking a second look at my changes? I am wondering if you or anyone
else see problems with this version.
I have changed the authentication variable to use files_struct instead of
privileged task, this is set to NULL when the owner closes the file. There
are a few other fixes here too. Overall this seems like a much better patch
than the original, thanks for your valuable input. I will continue trying to
break this thing and hope to re-send it to the main mailing list with the
updated name if it holds up. This is for my own system, but maybe others will
find it useful?
---
include/linux/shmem_fs.h | 1 +
include/uapi/linux/fcntl.h | 1 +
mm/shmem.c | 119 ++++++++++++++++++++++++++++-
tools/testing/selftests/memfd/memfd_test.c | 94 +++++++++++++++++++++++
4 files changed, 212 insertions(+), 3 deletions(-)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 50777b5..a3cffc3 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -12,6 +12,7 @@
struct shmem_inode_info {
spinlock_t lock;
+ void *auth; /* owner's task->files addr */
unsigned int seals; /* shmem seals */
unsigned long flags;
unsigned long alloced; /* data pages alloced to file */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index beed138..7138bb6 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -40,6 +40,7 @@
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#define F_SEAL_WRITE_PEER 0x0010 /*prevent writes from peers */
/* (1U << 31) is reserved for signed error codes */
/*
diff --git a/mm/shmem.c b/mm/shmem.c
index de98137..e3a926f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1481,7 +1481,10 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
/* i_mutex is held by caller */
- if (unlikely(info->seals)) {
+ if (info->seals) {
+ if (info->seals & F_SEAL_WRITE_PEER
+ && info->auth != current->files)
+ return -EPERM;
if (info->seals & F_SEAL_WRITE)
return -EPERM;
if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
@@ -1938,10 +1941,79 @@ continue_resched:
return error;
}
+/*
+ * returns 0 if ok. error if seal cannot be applied .
+ */
+static int shmem_seal_write_peer(struct file *file, unsigned int seals,
+ struct shmem_inode_info *info)
+{
+ struct vm_area_struct *vma;
+ struct vm_area_struct *curvma;
+ int c = 0;
+ int retval;
+
+
+ if (seals & F_SEAL_WRITE || info->seals & F_SEAL_WRITE)
+ return -EPERM;
+
+ if (atomic_read(&file->f_mapping->i_mmap_writable) < 0
+ || atomic_read(&file->f_mapping->i_mmap_writable) > 1
+ || current->files != info->auth)
+ return -EPERM;
+
+ /* lock current->mm */
+ task_lock(current);
+
+ /*
+ * search current task vma's for references to file.
+ * ensure that only one writable shared mapping exists
+ */
+ vma = NULL;
+ for (curvma = current->mm->mmap; curvma; curvma = curvma->vm_next) {
+ if (curvma->vm_file == file
+ && curvma->vm_flags & (VM_WRITE | VM_SHARED)) {
+ if (++c > 1) {
+ retval = -EPERM;
+ goto unlock;
+ }
+ vma = curvma;
+ }
+ }
+
+ if (vma == NULL) {
+ retval = mapping_deny_writable(file->f_mapping);
+ goto unlock;
+ }
+
+ /* 1 shared write mapping remains */
+ mapping_unmap_writable(file->f_mapping);
+ retval = mapping_deny_writable(file->f_mapping);
+ if (retval) {
+ mapping_allow_writable(file->f_mapping);
+ goto unlock;
+ }
+ retval = shmem_wait_for_pins(file->f_mapping);
+ if (retval) {
+ atomic_inc(&file->f_mapping->i_mmap_writable);
+ mapping_allow_writable(file->f_mapping);
+ goto unlock;
+ }
+
+ /* do not dupe */
+ vma->vm_flags |= VM_DONTCOPY;
+ retval = 0;
+
+unlock:
+ task_unlock(current);
+ return retval;
+}
+
+
#define F_ALL_SEALS (F_SEAL_SEAL | \
F_SEAL_SHRINK | \
F_SEAL_GROW | \
- F_SEAL_WRITE)
+ F_SEAL_WRITE | \
+ F_SEAL_WRITE_PEER)
int shmem_add_seals(struct file *file, unsigned int seals)
{
@@ -1965,6 +2037,11 @@ int shmem_add_seals(struct file *file, unsigned int seals)
* SEAL_SHRINK: Prevent the file from shrinking
* SEAL_GROW: Prevent the file from growing
* SEAL_WRITE: Prevent write access to the file
+ * SEAL_WRITE_PEER: Same effect as SEAL_WRITE, except the task
+ * that created the file is allowed to write, and
+ * retain a single writable shared mapping.
+ * authentication is done using the the tasks
+ * files_struct address.
*
* As we don't require any trust relationship between two parties, we
* must prevent seals from being removed. Therefore, sealing a file
@@ -1993,7 +2070,22 @@ int shmem_add_seals(struct file *file, unsigned int seals)
goto unlock;
}
+ if (seals & F_SEAL_WRITE_PEER && !(info->seals & F_SEAL_WRITE_PEER)) {
+ error = shmem_seal_write_peer(file, seals, info);
+ if (error)
+ goto unlock;
+ /*
+ * info->auth may be cleared if fd is re-open(2)'d via /proc
+ * because O_TRUNC causes flush. seal shrink to prevent this.
+ */
+ seals |= F_SEAL_SHRINK;
+ }
+
if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
+ if (info->seals & F_SEAL_WRITE_PEER) {
+ error = -EPERM;
+ goto unlock;
+ }
error = mapping_deny_writable(file->f_mapping);
if (error)
goto unlock;
@@ -2061,6 +2153,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
mutex_lock(&inode->i_mutex);
+ if (info->seals & F_SEAL_WRITE_PEER
+ && current->files != info->auth) {
+ error = -EPERM;
+ goto out;
+ }
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
struct address_space *mapping = file->f_mapping;
loff_t unmap_start = round_up(offset, PAGE_SIZE);
@@ -2960,8 +3058,10 @@ SYSCALL_DEFINE2(memfd_create,
info = SHMEM_I(file_inode(file));
file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
file->f_flags |= O_RDWR | O_LARGEFILE;
- if (flags & MFD_ALLOW_SEALING)
+ if (flags & MFD_ALLOW_SEALING) {
info->seals &= ~F_SEAL_SEAL;
+ info->auth = current->files;
+ }
fd_install(fd, file);
kfree(name);
@@ -2974,6 +3074,18 @@ err_name:
return error;
}
+static int shmem_flush(struct file *file, fl_owner_t id)
+{
+ struct shmem_inode_info *info;
+ info = SHMEM_I(file_inode(file));
+
+ /* F_SEAL_WRITE_PEER clears auth value if owner closes file */
+ if (info->auth == current->files
+ && atomic_read(&file->f_mapping->i_mmap_writable) <= 0)
+ info->auth = NULL;
+ return 0;
+}
+
#endif /* CONFIG_TMPFS */
static void shmem_put_super(struct super_block *sb)
@@ -3124,6 +3236,7 @@ static const struct file_operations shmem_file_operations = {
.splice_read = shmem_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = shmem_fallocate,
+ .flush = shmem_flush,
#endif
};
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 0b9eafb..b4afe2b 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -677,6 +677,98 @@ static void test_seal_shrink(void)
}
/*
+ * Test SEAL_WRITE_PEER
+ * Test whether SEAL_WRITE_PEER prevents modifications for all processes
+ * except for the one that created the memfd.
+ */
+static void test_seal_write_peer()
+{
+ int fd, fd2;
+ void *p, *p2, *privmap, *privmap2;
+ pid_t pid;
+ pid_t parent_pid;
+ int status;
+ char buf[MFD_DEF_SIZE*10];
+
+ parent_pid = getpid();
+ fd = mfd_assert_new("kern_memfd_seal_write_peer",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ /* create 2 shared|writes, and one private|read */
+ p = mfd_assert_mmap_shared(fd);
+ p2 = mfd_assert_mmap_shared(fd);
+ privmap = mfd_assert_mmap_private(fd);
+ /* verify that seal fails if multiple shared write mappings present */
+ mfd_fail_add_seals(fd, F_SEAL_WRITE_PEER);
+ munmap(p2, MFD_DEF_SIZE); /* unmap so theres only 1 shared|write */
+
+
+ /* private mappings with read|write made before seal is applied will
+ * end up having a MAP_SHARED set. in shmem_seal_write_peer there
+ * were some VM_SHARED mappings in task. the function ensures that
+ * if more than one VM_SHARED exists, the seal fails. so any private
+ * mappings with PROT_WRITE need to be created after F_SEAL_WRITE_PEER
+ * has been set.
+ */
+ privmap2 = mmap(NULL, MFD_DEF_SIZE,
+ PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (privmap2 == MAP_FAILED)
+ abort();
+ mfd_fail_add_seals(fd, F_SEAL_WRITE_PEER);
+ munmap(privmap2, MFD_DEF_SIZE);
+
+
+ /* F_SEAL_WRITE_PEER and F_SEAL_WRITE cannot coexist */
+ mfd_assert_add_seals(fd, F_SEAL_WRITE_PEER);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE_PEER | F_SEAL_SHRINK);
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+
+ /* verify that no further shared|write mappings can be made. */
+ p2 = mmap(NULL, MFD_DEF_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd, 0);
+ if (p2 != MAP_FAILED)
+ abort();
+
+ mfd_assert_read(fd);
+ mfd_fail_shrink(fd);
+ mfd_assert_grow(fd);
+ mfd_assert_grow_write(fd);
+
+ /* check authentication */
+ pid = fork();
+ if (pid == 0) /*this new process is not creator, writes should fail*/
+ {
+ /* attempt re-open peer fd with read/write */
+ sprintf(buf, "/proc/%d/fd/%d", parent_pid, fd);
+ /*fd = open(buf, O_RDWR | O_CREAT , S_IRUSR | S_IWUSR);*/
+ mfd_fail_shrink(fd);
+ mfd_fail_write(fd);
+ lseek(fd, 0, SEEK_SET);
+ mfd_fail_grow(fd);
+ mfd_fail_grow_write(fd);
+ mfd_assert_read(fd);
+ printf("|----: expecting segfault in forked process...\n");
+ memset(p2, 'Z', MFD_DEF_SIZE);
+ printf("|----: no segfault\n");
+ exit(-1);
+ } else if (pid == -1) {
+ printf("fork(): %m\n");
+ abort();
+ }
+
+ /*mfd_assert_write_nommap(fd);*/
+ /* abort if other process did not crash */
+ pid = waitpid(pid, &status, 0);
+ if (WIFEXITED(status))
+ abort();
+
+ close(fd);
+}
+
+/*
* Test SEAL_GROW
* Test whether SEAL_GROW actually prevents growing
*/
@@ -878,6 +970,8 @@ int main(int argc, char **argv)
test_seal_write();
printf("memfd: SEAL-SHRINK\n");
test_seal_shrink();
+ printf("memfd: SEAL-WRITE-PEER\n");
+ test_seal_write_peer();
printf("memfd: SEAL-GROW\n");
test_seal_grow();
printf("memfd: SEAL-RESIZE\n");
--
1.8.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2015-04-28 13:29 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-16 7:23 [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_NONCREATOR Michael Tirado
2015-04-16 8:14 ` Konstantin Khlebnikov
2015-04-16 12:01 ` David Herrmann
2015-04-17 4:28 ` Michael Tirado
2015-04-17 10:48 ` David Herrmann
2015-04-17 22:45 ` Michael Tirado
2015-04-18 12:13 ` David Herrmann
2015-04-17 4:18 ` Michael Tirado
2015-04-28 13:28 ` [PATCH] mm/shmem.c: Add new seal to memfd: F_SEAL_WRITE_PEER Michael Tirado
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).