linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Mike Rapoport" <rppt@linux.vnet.ibm.com>
To: Andrea Arcangeli <aarcange@redhat.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>,
	linux-mm <linux-mm@kvack.org>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>
Subject: [RFC PATCH 4/5] userfaultfd: non-cooperative: use fault_pending_wqh for all events
Date: Tue, 16 May 2017 13:36:01 +0300	[thread overview]
Message-ID: <1494930962-3318-5-git-send-email-rppt@linux.vnet.ibm.com> (raw)
In-Reply-To: <1494930962-3318-1-git-send-email-rppt@linux.vnet.ibm.com>

Queuing page faults and non-cooperative events into different wait queues
does not have real value but rather makes the code more complicated.

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
---
 fs/userfaultfd.c | 64 +++++++++++++++++++++-----------------------------------
 1 file changed, 24 insertions(+), 40 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1bd772a..8868229 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -48,8 +48,6 @@ struct userfaultfd_ctx {
 	wait_queue_head_t fault_wqh;
 	/* waitqueue head for the pseudo fd to wakeup poll/read */
 	wait_queue_head_t fd_wqh;
-	/* waitqueue head for events */
-	wait_queue_head_t event_wqh;
 	/* a refile sequence protected by fault_pending_wqh lock */
 	struct seqcount refile_seq;
 	/* pseudo fd refcounting */
@@ -101,6 +99,9 @@ struct userfaultfd_wake_key {
 static bool userfaultfd_should_wake(struct userfaultfd_wait_queue *uwq,
 				    struct userfaultfd_wake_key *key)
 {
+	if (key->event != uwq->msg.event)
+		return false;
+
 	if (key->event == UFFD_EVENT_PAGEFAULT) {
 		unsigned long start, len, address;
 
@@ -188,8 +189,6 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
 		VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
 		VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
 		VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
-		VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock));
-		VM_BUG_ON(waitqueue_active(&ctx->event_wqh));
 		VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
 		VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
 		mmdrop(ctx->mm);
@@ -560,22 +559,21 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	if (WARN_ON_ONCE(current->flags & PF_EXITING))
 		goto out;
 
-	ewq->ctx = ctx;
-	init_waitqueue_entry(&ewq->wq, current);
+	userfaultfd_init_waitqueue(ctx, ewq);
 
-	spin_lock(&ctx->event_wqh.lock);
+	spin_lock(&ctx->fault_pending_wqh.lock);
 	/*
 	 * After the __add_wait_queue the uwq is visible to userland
 	 * through poll/read().
 	 */
-	__add_wait_queue(&ctx->event_wqh, &ewq->wq);
+	__add_wait_queue(&ctx->fault_pending_wqh, &ewq->wq);
 	for (;;) {
 		set_current_state(TASK_KILLABLE);
-		if (ewq->msg.event == 0)
+		if (READ_ONCE(ewq->waken))
 			break;
 		if (ACCESS_ONCE(ctx->released) ||
 		    fatal_signal_pending(current)) {
-			__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+			__remove_wait_queue(&ctx->fault_pending_wqh, &ewq->wq);
 			if (ewq->msg.event == UFFD_EVENT_FORK) {
 				struct userfaultfd_ctx *new;
 
@@ -588,15 +586,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 			break;
 		}
 
-		spin_unlock(&ctx->event_wqh.lock);
+		spin_unlock(&ctx->fault_pending_wqh.lock);
 
 		wake_up_poll(&ctx->fd_wqh, POLLIN);
 		schedule();
 
-		spin_lock(&ctx->event_wqh.lock);
+		spin_lock(&ctx->fault_pending_wqh.lock);
 	}
 	__set_current_state(TASK_RUNNING);
-	spin_unlock(&ctx->event_wqh.lock);
+	spin_unlock(&ctx->fault_pending_wqh.lock);
 
 	/*
 	 * ctx may go away after this if the userfault pseudo fd is
@@ -609,9 +607,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
 				       struct userfaultfd_wait_queue *ewq)
 {
-	ewq->msg.event = 0;
-	wake_up_locked(&ctx->event_wqh);
-	__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+	struct userfaultfd_wake_key key = { 0 };
+
+	key.event = ewq->msg.event;
+	__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &key);
 }
 
 int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
@@ -898,12 +897,6 @@ static inline struct userfaultfd_wait_queue *find_userfault(
 	return find_userfault_in(&ctx->fault_pending_wqh);
 }
 
-static inline struct userfaultfd_wait_queue *find_userfault_evt(
-		struct userfaultfd_ctx *ctx)
-{
-	return find_userfault_in(&ctx->event_wqh);
-}
-
 static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
 {
 	struct userfaultfd_ctx *ctx = file->private_data;
@@ -935,8 +928,6 @@ static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
 		smp_mb();
 		if (waitqueue_active(&ctx->fault_pending_wqh))
 			ret = POLLIN;
-		else if (waitqueue_active(&ctx->event_wqh))
-			ret = POLLIN;
 
 		return ret;
 	default:
@@ -981,7 +972,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 	struct userfaultfd_wait_queue *uwq;
 	/*
 	 * Handling fork event requires sleeping operations, so
-	 * we drop the event_wqh lock, then do these ops, then
+	 * we drop the fault_pending_wqh lock, then do these ops, then
 	 * lock it back and wake up the waiter. While the lock is
 	 * dropped the ewq may go away so we keep track of it
 	 * carefully.
@@ -996,7 +987,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 		set_current_state(TASK_INTERRUPTIBLE);
 		spin_lock(&ctx->fault_pending_wqh.lock);
 		uwq = find_userfault(ctx);
-		if (uwq) {
+		if (uwq && uwq->msg.event == UFFD_EVENT_PAGEFAULT) {
 			/*
 			 * Use a seqcount to repeat the lockless check
 			 * in wake_userfault() to avoid missing
@@ -1037,12 +1028,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			spin_unlock(&ctx->fault_pending_wqh.lock);
 			ret = 0;
 			break;
-		}
-		spin_unlock(&ctx->fault_pending_wqh.lock);
-
-		spin_lock(&ctx->event_wqh.lock);
-		uwq = find_userfault_evt(ctx);
-		if (uwq) {
+		} else if (uwq) { /* non-pagefault event */
 			*msg = uwq->msg;
 
 			if (uwq->msg.event == UFFD_EVENT_FORK) {
@@ -1050,17 +1036,16 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 					(unsigned long)
 					uwq->msg.arg.reserved.reserved1;
 				list_move(&uwq->wq.task_list, &fork_event);
-				spin_unlock(&ctx->event_wqh.lock);
+				spin_unlock(&ctx->fault_pending_wqh.lock);
 				ret = 0;
 				break;
 			}
-
 			userfaultfd_event_complete(ctx, uwq);
-			spin_unlock(&ctx->event_wqh.lock);
+			spin_unlock(&ctx->fault_pending_wqh.lock);
 			ret = 0;
 			break;
 		}
-		spin_unlock(&ctx->event_wqh.lock);
+		spin_unlock(&ctx->fault_pending_wqh.lock);
 
 		if (signal_pending(current)) {
 			ret = -ERESTARTSYS;
@@ -1082,16 +1067,16 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 		ret = resolve_userfault_fork(ctx, fork_nctx, msg);
 
 		if (!ret) {
-			spin_lock(&ctx->event_wqh.lock);
+			spin_lock(&ctx->fault_pending_wqh.lock);
 			if (!list_empty(&fork_event)) {
 				uwq = list_first_entry(&fork_event,
 						       typeof(*uwq),
 						       wq.task_list);
 				list_del(&uwq->wq.task_list);
-				__add_wait_queue(&ctx->event_wqh, &uwq->wq);
+				__add_wait_queue(&ctx->fault_pending_wqh, &uwq->wq);
 				userfaultfd_event_complete(ctx, uwq);
 			}
-			spin_unlock(&ctx->event_wqh.lock);
+			spin_unlock(&ctx->fault_pending_wqh.lock);
 		}
 	}
 
@@ -1808,7 +1793,6 @@ static void init_once_userfaultfd_ctx(void *mem)
 
 	init_waitqueue_head(&ctx->fault_pending_wqh);
 	init_waitqueue_head(&ctx->fault_wqh);
-	init_waitqueue_head(&ctx->event_wqh);
 	init_waitqueue_head(&ctx->fd_wqh);
 	seqcount_init(&ctx->refile_seq);
 }
-- 
2.7.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-05-16 10:36 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-16 10:35 [RFC PATCH 0/5] userfaultfd: non-cooperative: syncronous events Mike Rapoport
2017-05-16 10:35 ` [RFC PATCH 1/5] userfaultfd: introduce userfault_init_waitqueue helper Mike Rapoport
2017-05-16 10:35 ` [RFC PATCH 2/5] userfaultfd: introduce userfaultfd_should_wait helper Mike Rapoport
2017-05-16 10:36 ` [RFC PATCH 3/5] userfaultfd: non-cooperative: generalize wake key structure Mike Rapoport
2017-05-16 10:36 ` Mike Rapoport [this message]
2017-05-16 10:36 ` [RFC PATCH 5/5] userfaultfd: non-cooperative: allow synchronous EVENT_REMOVE Mike Rapoport
2017-06-27 13:39 ` [RFC PATCH 0/5] userfaultfd: non-cooperative: syncronous events Mike Rapoport
2017-08-11 13:46   ` Blake Caldwell
2017-08-14  4:58     ` Mike Rapoport

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1494930962-3318-5-git-send-email-rppt@linux.vnet.ibm.com \
    --to=rppt@linux.vnet.ibm.com \
    --cc=aarcange@redhat.com \
    --cc=linux-mm@kvack.org \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).