From: Andrea Arcangeli <aarcange@redhat.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, Michael Rapoport <RAPOPORT@il.ibm.com>,
"Dr. David Alan Gilbert"@v2.random, " <dgilbert@redhat.com>,
Mike Kravetz <mike.kravetz@oracle.com>,
Shaohua Li <shli@fb.com>,
Pavel Emelyanov <xemul@parallels.com>"@v2.random
Subject: [PATCH 06/33] userfaultfd: non-cooperative: Add ability to report non-PF events from uffd descriptor
Date: Wed, 2 Nov 2016 20:33:38 +0100 [thread overview]
Message-ID: <1478115245-32090-7-git-send-email-aarcange@redhat.com> (raw)
In-Reply-To: <1478115245-32090-1-git-send-email-aarcange@redhat.com>
From: Pavel Emelyanov <xemul@parallels.com>
The custom events are queued in ctx->event_wqh not to disturb the
fast-path-ed PF queue-wait-wakeup functions.
The events to be generated (other than PF-s) are requested in UFFD_API
ioctl with the uffd_api.features bits. Those, known by the kernel, are
then turned on and reported back to the user-space.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
fs/userfaultfd.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 96 insertions(+), 2 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index b4f790f..76205b3 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -12,6 +12,7 @@
* mm/ksm.c (mm hashing).
*/
+#include <linux/list.h>
#include <linux/hashtable.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -45,12 +46,16 @@ struct userfaultfd_ctx {
wait_queue_head_t fault_wqh;
/* waitqueue head for the pseudo fd to wakeup poll/read */
wait_queue_head_t fd_wqh;
+ /* waitqueue head for events */
+ wait_queue_head_t event_wqh;
/* a refile sequence protected by fault_pending_wqh lock */
struct seqcount refile_seq;
/* pseudo fd refcounting */
atomic_t refcount;
/* userfaultfd syscall flags */
unsigned int flags;
+ /* features requested from the userspace */
+ unsigned int features;
/* state machine */
enum userfaultfd_state state;
/* released */
@@ -135,6 +140,8 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
+ VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock));
+ VM_BUG_ON(waitqueue_active(&ctx->event_wqh));
VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
mmdrop(ctx->mm);
@@ -423,6 +430,59 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
return ret;
}
+static int __maybe_unused userfaultfd_event_wait_completion(
+ struct userfaultfd_ctx *ctx,
+ struct userfaultfd_wait_queue *ewq)
+{
+ int ret = 0;
+
+ ewq->ctx = ctx;
+ init_waitqueue_entry(&ewq->wq, current);
+
+ spin_lock(&ctx->event_wqh.lock);
+ /*
+ * After the __add_wait_queue the uwq is visible to userland
+ * through poll/read().
+ */
+ __add_wait_queue(&ctx->event_wqh, &ewq->wq);
+ for (;;) {
+ set_current_state(TASK_KILLABLE);
+ if (ewq->msg.event == 0)
+ break;
+ if (ACCESS_ONCE(ctx->released) ||
+ fatal_signal_pending(current)) {
+ ret = -1;
+ __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+ break;
+ }
+
+ spin_unlock(&ctx->event_wqh.lock);
+
+ wake_up_poll(&ctx->fd_wqh, POLLIN);
+ schedule();
+
+ spin_lock(&ctx->event_wqh.lock);
+ }
+ __set_current_state(TASK_RUNNING);
+ spin_unlock(&ctx->event_wqh.lock);
+
+ /*
+ * ctx may go away after this if the userfault pseudo fd is
+ * already released.
+ */
+
+ userfaultfd_ctx_put(ctx);
+ return ret;
+}
+
+static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
+ struct userfaultfd_wait_queue *ewq)
+{
+ ewq->msg.event = 0;
+ wake_up_locked(&ctx->event_wqh);
+ __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+}
+
static int userfaultfd_release(struct inode *inode, struct file *file)
{
struct userfaultfd_ctx *ctx = file->private_data;
@@ -511,6 +571,12 @@ static inline struct userfaultfd_wait_queue *find_userfault(
return find_userfault_in(&ctx->fault_pending_wqh);
}
+static inline struct userfaultfd_wait_queue *find_userfault_evt(
+ struct userfaultfd_ctx *ctx)
+{
+ return find_userfault_in(&ctx->event_wqh);
+}
+
static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
{
struct userfaultfd_ctx *ctx = file->private_data;
@@ -542,6 +608,9 @@ static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
smp_mb();
if (waitqueue_active(&ctx->fault_pending_wqh))
ret = POLLIN;
+ else if (waitqueue_active(&ctx->event_wqh))
+ ret = POLLIN;
+
return ret;
default:
WARN_ON_ONCE(1);
@@ -606,6 +675,19 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
break;
}
spin_unlock(&ctx->fault_pending_wqh.lock);
+
+ spin_lock(&ctx->event_wqh.lock);
+ uwq = find_userfault_evt(ctx);
+ if (uwq) {
+ *msg = uwq->msg;
+
+ userfaultfd_event_complete(ctx, uwq);
+ spin_unlock(&ctx->event_wqh.lock);
+ ret = 0;
+ break;
+ }
+ spin_unlock(&ctx->event_wqh.lock);
+
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -1149,6 +1231,14 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
return ret;
}
+static inline unsigned int uffd_ctx_features(__u64 user_features)
+{
+ /*
+ * For the current set of features the bits just coincide
+ */
+ return (unsigned int)user_features;
+}
+
/*
* userland asks for a certain API version and we return which bits
* and ioctl commands are implemented in this kernel for such API
@@ -1167,19 +1257,21 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
ret = -EFAULT;
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
goto out;
- if (uffdio_api.api != UFFD_API || uffdio_api.features) {
+ if (uffdio_api.api != UFFD_API ||
+ (uffdio_api.features & ~UFFD_API_FEATURES)) {
memset(&uffdio_api, 0, sizeof(uffdio_api));
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
goto out;
ret = -EINVAL;
goto out;
}
- uffdio_api.features = UFFD_API_FEATURES;
+ uffdio_api.features &= UFFD_API_FEATURES;
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
goto out;
ctx->state = UFFD_STATE_RUNNING;
+ ctx->features = uffd_ctx_features(uffdio_api.features);
ret = 0;
out:
return ret;
@@ -1266,6 +1358,7 @@ static void init_once_userfaultfd_ctx(void *mem)
init_waitqueue_head(&ctx->fault_pending_wqh);
init_waitqueue_head(&ctx->fault_wqh);
+ init_waitqueue_head(&ctx->event_wqh);
init_waitqueue_head(&ctx->fd_wqh);
seqcount_init(&ctx->refile_seq);
}
@@ -1306,6 +1399,7 @@ static struct file *userfaultfd_file_create(int flags)
atomic_set(&ctx->refcount, 1);
ctx->flags = flags;
+ ctx->features = 0;
ctx->state = UFFD_STATE_WAIT_API;
ctx->released = false;
ctx->mm = current->mm;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2016-11-02 19:40 UTC|newest]
Thread overview: 69+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-02 19:33 [PATCH 00/33] userfaultfd tmpfs/hugetlbfs/non-cooperative Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 01/33] userfaultfd: document _IOR/_IOW Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 02/33] userfaultfd: correct comment about UFFD_FEATURE_PAGEFAULT_FLAG_WP Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 03/33] userfaultfd: convert BUG() to WARN_ON_ONCE() Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 04/33] userfaultfd: use vma_is_anonymous Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 05/33] userfaultfd: non-cooperative: Split the find_userfault() routine Andrea Arcangeli
2016-11-02 19:33 ` Andrea Arcangeli [this message]
2016-11-02 19:33 ` [PATCH 07/33] userfaultfd: non-cooperative: report all available features to userland Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 08/33] userfaultfd: non-cooperative: Add fork() event Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 09/33] userfaultfd: non-cooperative: Add fork() event, build warning fix Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 10/33] userfaultfd: non-cooperative: dup_userfaultfd: use mm_count instead of mm_users Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 11/33] userfaultfd: non-cooperative: Add mremap() event Andrea Arcangeli
2016-11-03 7:41 ` Hillf Danton
2016-11-03 17:52 ` Mike Rapoport
2016-11-04 15:40 ` Mike Rapoport
2016-11-02 19:33 ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED request Andrea Arcangeli
2016-11-03 8:01 ` Hillf Danton
2016-11-03 17:24 ` Mike Rapoport
2016-11-04 16:40 ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED requestg Andrea Arcangeli
2016-11-04 15:42 ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED request Mike Rapoport
2016-11-02 19:33 ` [PATCH 13/33] userfaultfd: hugetlbfs: add copy_huge_page_from_user for hugetlb userfaultfd support Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 14/33] userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for " Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 15/33] userfaultfd: hugetlbfs: add __mcopy_atomic_hugetlb for huge page UFFDIO_COPY Andrea Arcangeli
2016-11-03 10:15 ` Hillf Danton
2016-11-03 17:33 ` Mike Kravetz
2016-11-03 19:14 ` Mike Kravetz
2016-11-04 6:43 ` Hillf Danton
2016-11-04 19:36 ` Andrea Arcangeli
2016-11-04 20:34 ` Mike Kravetz
2016-11-08 21:06 ` Mike Kravetz
2016-11-16 18:28 ` Andrea Arcangeli
2016-11-16 18:53 ` Mike Kravetz
2016-11-17 15:40 ` Andrea Arcangeli
2016-11-17 19:26 ` Mike Kravetz
2016-11-18 0:05 ` Andrea Arcangeli
2016-11-18 5:52 ` Mike Kravetz
2016-11-22 1:16 ` Mike Kravetz
2016-11-23 6:38 ` Hillf Danton
2016-12-15 19:02 ` Andrea Arcangeli
2016-12-16 3:54 ` Hillf Danton
2016-11-17 19:41 ` Mike Kravetz
2016-11-04 16:35 ` Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 16/33] userfaultfd: hugetlbfs: add userfaultfd hugetlb hook Andrea Arcangeli
2016-11-04 7:02 ` Hillf Danton
2016-11-02 19:33 ` [PATCH 17/33] userfaultfd: hugetlbfs: allow registration of ranges containing huge pages Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 18/33] userfaultfd: hugetlbfs: add userfaultfd_hugetlb test Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 19/33] userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 20/33] userfaultfd: introduce vma_can_userfault Andrea Arcangeli
2016-11-04 7:39 ` Hillf Danton
2016-11-02 19:33 ` [PATCH 21/33] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 22/33] userfaultfd: shmem: introduce vma_is_shmem Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 23/33] userfaultfd: shmem: add tlbflush.h header for microblaze Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 24/33] userfaultfd: shmem: use shmem_mcopy_atomic_pte for shared memory Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 25/33] userfaultfd: shmem: add userfaultfd hook for shared memory faults Andrea Arcangeli
2016-11-04 8:59 ` Hillf Danton
2016-11-04 14:53 ` Mike Rapoport
2016-11-04 15:44 ` Mike Rapoport
2016-11-04 16:56 ` Andrea Arcangeli
2016-11-18 0:37 ` Andrea Arcangeli
2016-11-20 12:10 ` Mike Rapoport
2016-11-02 19:33 ` [PATCH 26/33] userfaultfd: shmem: allow registration of shared memory ranges Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 27/33] userfaultfd: shmem: add userfaultfd_shmem test Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 28/33] userfaultfd: shmem: lock the page before adding it to pagecache Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 29/33] userfaultfd: shmem: avoid leaking blocks and used blocks in UFFDIO_COPY Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 30/33] userfaultfd: non-cooperative: selftest: introduce userfaultfd_open Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 31/33] userfaultfd: non-cooperative: selftest: add ufd parameter to copy_page Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 32/33] userfaultfd: non-cooperative: selftest: add test for FORK, MADVDONTNEED and REMAP events Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 33/33] mm: mprotect: use pmd_trans_unstable instead of taking the pmd_lock Andrea Arcangeli
2016-11-02 20:07 ` [PATCH 00/33] userfaultfd tmpfs/hugetlbfs/non-cooperative Andrea Arcangeli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1478115245-32090-7-git-send-email-aarcange@redhat.com \
--to=aarcange@redhat.com \
--cc=" <dgilbert@redhat.com>, Mike Kravetz <mike.kravetz@oracle.com>, Shaohua Li <shli@fb.com>, Pavel Emelyanov <xemul@parallels.com>"@v2.random \
--cc="Dr. David Alan Gilbert"@v2.random \
--cc=RAPOPORT@il.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).