linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: Jens Axboe <axboe@kernel.dk>, Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>, Ingo Molnar <mingo@redhat.com>,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 0/2] Optimise io_uring completion waiting
Date: Tue, 24 Sep 2019 20:33:26 +0300	[thread overview]
Message-ID: <79fa9cc2-e0cc-922f-89d3-9ace59abb2e8@gmail.com> (raw)
In-Reply-To: <a0a0cddf-c5ae-43b0-5445-0bd55e4b7c45@kernel.dk>


[-- Attachment #1.1: Type: text/plain, Size: 3668 bytes --]

On 24/09/2019 16:13, Jens Axboe wrote:
> On 9/24/19 5:23 AM, Pavel Begunkov wrote:
>>> Yep that should do it, and saves 8 bytes of stack as well.
>>>
>>> BTW, did you test my patch, this one or the previous? Just curious if it
>>> worked for you.
>>>
>> Not yet, going to do that tonight
> 
> Thanks! For reference, the final version is below. There was still a
> signal mishap in there, now it should all be correct afaict.
> 
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 9b84232e5cc4..d2a86164d520 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2768,6 +2768,38 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
>  	return submit;
>  }
>  
> +struct io_wait_queue {
> +	struct wait_queue_entry wq;
> +	struct io_ring_ctx *ctx;
> +	unsigned to_wait;
> +	unsigned nr_timeouts;
> +};
> +
> +static inline bool io_should_wake(struct io_wait_queue *iowq)
> +{
> +	struct io_ring_ctx *ctx = iowq->ctx;
> +
> +	/*
> +	 * Wake up if we have enough events, or if a timeout occured since we
> +	 * started waiting. For timeouts, we always want to return to userspace,
> +	 * regardless of event count.
> +	 */
> +	return io_cqring_events(ctx->rings) >= iowq->to_wait ||
> +			atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
> +}
> +
> +static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
> +			    int wake_flags, void *key)
> +{
> +	struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
> +							wq);
> +
> +	if (!io_should_wake(iowq))
> +		return -1;

It would try to schedule only the first task in the wait list. Is that the
semantic you want?
E.g. for waiters=[32,8] and nr_events == 8, io_wake_function() returns
after @32, and won't wake up the second one. 

> +
> +	return autoremove_wake_function(curr, mode, wake_flags, key);
> +}
> +
>  /*
>   * Wait until events become available, if we don't already have some. The
>   * application must reap them itself, as they reside on the shared cq ring.
> @@ -2775,8 +2807,16 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
>  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  			  const sigset_t __user *sig, size_t sigsz)
>  {
> +	struct io_wait_queue iowq = {
> +		.wq = {
> +			.private	= current,
> +			.func		= io_wake_function,
> +			.entry		= LIST_HEAD_INIT(iowq.wq.entry),
> +		},
> +		.ctx		= ctx,
> +		.to_wait	= min_events,
> +	};
>  	struct io_rings *rings = ctx->rings;
> -	unsigned nr_timeouts;
>  	int ret;
>  
>  	if (io_cqring_events(rings) >= min_events)
> @@ -2795,15 +2835,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  			return ret;
>  	}
>  
> -	nr_timeouts = atomic_read(&ctx->cq_timeouts);
> -	/*
> -	 * Return if we have enough events, or if a timeout occured since
> -	 * we started waiting. For timeouts, we always want to return to
> -	 * userspace.
> -	 */
> -	ret = wait_event_interruptible(ctx->wait,
> -				io_cqring_events(rings) >= min_events ||
> -				atomic_read(&ctx->cq_timeouts) != nr_timeouts);
> +	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
> +	do {
> +		prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
> +						TASK_INTERRUPTIBLE);
> +		if (io_should_wake(&iowq))
> +			break;
> +		schedule();
> +		if (signal_pending(current)) {
> +			ret = -ERESTARTSYS;
> +			break;
> +		}
> +	} while (1);
> +	finish_wait(&ctx->wait, &iowq.wq);
> +
>  	restore_saved_sigmask_unless(ret == -ERESTARTSYS);
>  	if (ret == -ERESTARTSYS)
>  		ret = -EINTR;
> 

-- 
Yours sincerely,
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2019-09-24 17:33 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-22  8:08 [PATCH v2 0/2] Optimise io_uring completion waiting Pavel Begunkov (Silence)
2019-09-22  8:08 ` [PATCH v2 1/2] sched/wait: Add wait_threshold Pavel Begunkov (Silence)
2019-09-23  7:19   ` Peter Zijlstra
2019-09-23 16:37     ` Pavel Begunkov
2019-09-23 19:27       ` Peter Zijlstra
2019-09-23 20:23         ` Peter Zijlstra
2019-09-24  6:44         ` Pavel Begunkov
2019-09-22  8:08 ` [PATCH v2 2/2] io_uring: Optimise cq waiting with wait_threshold Pavel Begunkov (Silence)
2019-09-22 15:51 ` [PATCH v2 0/2] Optimise io_uring completion waiting Jens Axboe
2019-09-23  8:35   ` Ingo Molnar
2019-09-23 16:21     ` Pavel Begunkov
2019-09-23 16:32       ` Pavel Begunkov
2019-09-23 20:48         ` Jens Axboe
2019-09-23 23:00           ` Jens Axboe
2019-09-24  7:06             ` Pavel Begunkov
2019-09-24  8:02               ` Jens Axboe
2019-09-24  8:27                 ` Jens Axboe
2019-09-24  8:36                   ` Jens Axboe
2019-09-24  9:33                     ` Pavel Begunkov
2019-09-24 10:11                       ` Jens Axboe
2019-09-24  9:49                     ` Peter Zijlstra
2019-09-24 10:13                       ` Jens Axboe
2019-09-24 10:34                         ` Jens Axboe
2019-09-24 11:11                           ` Pavel Begunkov
2019-09-24 11:15                             ` Jens Axboe
2019-09-24 11:23                               ` Pavel Begunkov
2019-09-24 13:13                                 ` Jens Axboe
2019-09-24 17:33                                   ` Pavel Begunkov [this message]
2019-09-24 17:46                                     ` Jens Axboe
2019-09-24 18:28                                       ` Pavel Begunkov
2019-09-24 19:32                                         ` Jens Axboe
2019-09-24 11:43                             ` Peter Zijlstra
2019-09-24 12:57                               ` Jens Axboe
2019-09-24 11:33                           ` Peter Zijlstra
2019-09-24  9:20                   ` Pavel Begunkov
2019-09-24 10:09                     ` Jens Axboe
2019-09-24  9:21                 ` Pavel Begunkov
2019-09-24 10:09                   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=79fa9cc2-e0cc-922f-89d3-9ace59abb2e8@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).