From: Jens Axboe <axboe@kernel.dk>
To: io-uring@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 6/8] io_uring: split SQPOLL data into separate structure
Date: Wed, 2 Sep 2020 20:20:51 -0600 [thread overview]
Message-ID: <20200903022053.912968-7-axboe@kernel.dk> (raw)
In-Reply-To: <20200903022053.912968-1-axboe@kernel.dk>
Move all the necessary state out of io_ring_ctx, and into a new
structure, io_sq_data. The latter now deals with any state or
variables associated with the SQPOLL thread itself.
In preparation for supporting more than one io_ring_ctx per SQPOLL
thread.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/io_uring.c | 104 +++++++++++++++++++++++++++++++++++---------------
1 file changed, 73 insertions(+), 31 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8ce1b4247120..35ea69aad9c0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -229,6 +229,12 @@ struct io_restriction {
bool registered;
};
+struct io_sq_data {
+ refcount_t refs;
+ struct task_struct *thread;
+ struct wait_queue_head wait;
+};
+
struct io_ring_ctx {
struct {
struct percpu_ref refs;
@@ -275,13 +281,7 @@ struct io_ring_ctx {
/* IO offload */
struct io_wq *io_wq;
- struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm;
- struct wait_queue_head *sqo_wait;
- struct wait_queue_head __sqo_wait;
- struct wait_queue_entry sqo_wait_entry;
-
-
/*
* For SQPOLL usage - no reference is held to this file table, we
* rely on fops->flush() and our callback there waiting for the users
@@ -289,6 +289,10 @@ struct io_ring_ctx {
*/
struct files_struct *sqo_files;
+ struct wait_queue_entry sqo_wait_entry;
+
+ struct io_sq_data *sq_data; /* if using sq thread polling */
+
/*
* If used, fixed file set. Writers must ensure that ->refs is dead,
* readers must ensure that ->refs is alive as long as the file* is
@@ -1086,8 +1090,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
goto err;
ctx->flags = p->flags;
- init_waitqueue_head(&ctx->__sqo_wait);
- ctx->sqo_wait = &ctx->__sqo_wait;
init_waitqueue_head(&ctx->cq_wait);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->ref_comp);
@@ -1350,8 +1352,8 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
- if (waitqueue_active(ctx->sqo_wait))
- wake_up(ctx->sqo_wait);
+ if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
+ wake_up(&ctx->sq_data->wait);
if (io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1);
}
@@ -2415,8 +2417,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
else
list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
- if ((ctx->flags & IORING_SETUP_SQPOLL) && wq_has_sleeper(ctx->sqo_wait))
- wake_up(ctx->sqo_wait);
+ if ((ctx->flags & IORING_SETUP_SQPOLL) &&
+ wq_has_sleeper(&ctx->sq_data->wait))
+ wake_up(&ctx->sq_data->wait);
}
static void __io_state_file_put(struct io_submit_state *state)
@@ -6635,6 +6638,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
unsigned long start_jiffies)
{
unsigned long timeout = start_jiffies + ctx->sq_thread_idle;
+ struct io_sq_data *sqd = ctx->sq_data;
unsigned int to_submit;
int ret = 0;
@@ -6675,7 +6679,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
!percpu_ref_is_dying(&ctx->refs)))
return SQT_SPIN;
- prepare_to_wait(ctx->sqo_wait, &ctx->sqo_wait_entry,
+ prepare_to_wait(&sqd->wait, &ctx->sqo_wait_entry,
TASK_INTERRUPTIBLE);
/*
@@ -6687,7 +6691,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
*/
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
!list_empty_careful(&ctx->iopoll_list)) {
- finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
+ finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
goto again;
}
@@ -6697,7 +6701,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
if (!to_submit || ret == -EBUSY)
return SQT_IDLE;
- finish_wait(ctx->sqo_wait, &ctx->sqo_wait_entry);
+ finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
io_ring_clear_wakeup_flag(ctx);
}
@@ -6925,18 +6929,46 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
return 0;
}
-static void io_sq_thread_stop(struct io_ring_ctx *ctx)
+static void io_put_sq_data(struct io_sq_data *sqd)
{
- if (ctx->sqo_thread) {
- wait_for_completion(&ctx->sq_thread_comp);
+ if (refcount_dec_and_test(&sqd->refs)) {
/*
* The park is a bit of a work-around, without it we get
* warning spews on shutdown with SQPOLL set and affinity
* set to a single CPU.
*/
- kthread_park(ctx->sqo_thread);
- kthread_stop(ctx->sqo_thread);
- ctx->sqo_thread = NULL;
+ if (sqd->thread) {
+ kthread_park(sqd->thread);
+ kthread_stop(sqd->thread);
+ }
+
+ kfree(sqd);
+ }
+}
+
+static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
+{
+ struct io_sq_data *sqd;
+
+ sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
+ if (!sqd)
+ return ERR_PTR(-ENOMEM);
+
+ refcount_set(&sqd->refs, 1);
+ init_waitqueue_head(&sqd->wait);
+ return sqd;
+}
+
+static void io_sq_thread_stop(struct io_ring_ctx *ctx)
+{
+ struct io_sq_data *sqd = ctx->sq_data;
+
+ if (sqd) {
+ if (sqd->thread)
+ wait_for_completion(&ctx->sq_thread_comp);
+
+ io_put_sq_data(sqd);
+ ctx->sq_data = NULL;
}
}
@@ -7576,10 +7608,19 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
int ret;
if (ctx->flags & IORING_SETUP_SQPOLL) {
+ struct io_sq_data *sqd;
+
ret = -EPERM;
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
goto err;
+ sqd = io_get_sq_data(p);
+ if (IS_ERR(sqd)) {
+ ret = PTR_ERR(sqd);
+ goto err;
+ }
+ ctx->sq_data = sqd;
+
/*
* We will exit the sqthread before current exits, so we can
* avoid taking a reference here and introducing weird
@@ -7600,16 +7641,15 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
if (!cpu_online(cpu))
goto err;
- ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
- ctx, cpu,
- "io_uring-sq");
+ sqd->thread = kthread_create_on_cpu(io_sq_thread, ctx,
+ cpu, "io_uring-sq");
} else {
- ctx->sqo_thread = kthread_create(io_sq_thread, ctx,
+ sqd->thread = kthread_create(io_sq_thread, ctx,
"io_uring-sq");
}
- if (IS_ERR(ctx->sqo_thread)) {
- ret = PTR_ERR(ctx->sqo_thread);
- ctx->sqo_thread = NULL;
+ if (IS_ERR(sqd->thread)) {
+ ret = PTR_ERR(sqd->thread);
+ sqd->thread = NULL;
goto err;
}
} else if (p->flags & IORING_SETUP_SQ_AFF) {
@@ -7631,8 +7671,10 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
static void io_sq_offload_start(struct io_ring_ctx *ctx)
{
- if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread)
- wake_up_process(ctx->sqo_thread);
+ struct io_sq_data *sqd = ctx->sq_data;
+
+ if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd->thread)
+ wake_up_process(sqd->thread);
}
static inline void __io_unaccount_mem(struct user_struct *user,
@@ -8396,7 +8438,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
if (!list_empty_careful(&ctx->cq_overflow_list))
io_cqring_overflow_flush(ctx, false);
if (flags & IORING_ENTER_SQ_WAKEUP)
- wake_up(ctx->sqo_wait);
+ wake_up(&ctx->sq_data->wait);
submitted = to_submit;
} else if (to_submit) {
mutex_lock(&ctx->uring_lock);
--
2.28.0
next prev parent reply other threads:[~2020-09-03 2:21 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-03 2:20 [PATCHSET for-next 0/8] io_uring SQPOLL improvements Jens Axboe
2020-09-03 2:20 ` [PATCH 1/8] io_uring: io_sq_thread() doesn't need to flush signals Jens Axboe
2020-09-03 2:20 ` [PATCH 2/8] io_uring: allow SQPOLL with CAP_SYS_NICE privileges Jens Axboe
2020-09-03 2:20 ` [PATCH 3/8] io_uring: use private ctx wait queue entries for SQPOLL Jens Axboe
2020-09-03 2:20 ` [PATCH 4/8] io_uring: move SQPOLL post-wakeup ring need wakeup flag into wake handler Jens Axboe
2020-09-03 2:20 ` [PATCH 5/8] io_uring: split work handling part of SQPOLL into helper Jens Axboe
2020-09-03 2:20 ` Jens Axboe [this message]
2020-09-03 2:20 ` [PATCH 7/8] io_uring: base SQPOLL handling off io_sq_data Jens Axboe
2020-09-03 2:20 ` [PATCH 8/8] io_uring: enable IORING_SETUP_ATTACH_WQ to attach to SQPOLL thread too Jens Axboe
2020-09-07 8:56 ` Xiaoguang Wang
2020-09-07 14:00 ` Pavel Begunkov
2020-09-07 16:11 ` Jens Axboe
2020-09-07 16:14 ` Jens Axboe
2020-09-07 16:18 ` Jens Axboe
2020-09-08 2:28 ` Xiaoguang Wang
2020-09-08 2:53 ` Xiaoguang Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200903022053.912968-7-axboe@kernel.dk \
--to=axboe@kernel.dk \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.