All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Pavel Begunkov <asml.silence@gmail.com>, io-uring@vger.kernel.org
Subject: Re: [PATCH 1/6] io_uring: expand main struct io_kiocb flags to 64-bits
Date: Tue, 6 Feb 2024 19:18:47 -0700	[thread overview]
Message-ID: <6f55dbd7-62a3-48d0-bc5a-2ddddb69e9ac@kernel.dk> (raw)
In-Reply-To: <f4e5bd14-2550-4683-bdc3-7521351f81e1@gmail.com>

On 2/6/24 5:43 PM, Pavel Begunkov wrote:
> On 2/6/24 16:22, Jens Axboe wrote:
>> We're out of space here, and none of the flags are easily reclaimable.
>> Bump it to 64-bits and re-arrange the struct a bit to avoid gaps.
>>
>> Add a specific bitwise type for the request flags, io_request_flags_t.
>> This will help catch violations of casting this value to a smaller type
>> on 32-bit archs, like unsigned int.
>>
>> No functional changes intended in this patch.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>   include/linux/io_uring_types.h  | 87 ++++++++++++++++++---------------
>>   include/trace/events/io_uring.h | 14 +++---
>>   io_uring/filetable.h            |  2 +-
>>   io_uring/io_uring.c             |  9 ++--
>>   4 files changed, 60 insertions(+), 52 deletions(-)
>>
>> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
>> index 854ad67a5f70..5ac18b05d4ee 100644
>> --- a/include/linux/io_uring_types.h
>> +++ b/include/linux/io_uring_types.h
>> @@ -428,7 +428,7 @@ struct io_tw_state {
>>       bool locked;
>>   };
>>   -enum {
>> +enum io_req_flags {
>>       REQ_F_FIXED_FILE_BIT    = IOSQE_FIXED_FILE_BIT,
>>       REQ_F_IO_DRAIN_BIT    = IOSQE_IO_DRAIN_BIT,
>>       REQ_F_LINK_BIT        = IOSQE_IO_LINK_BIT,
>> @@ -468,70 +468,73 @@ enum {
>>       __REQ_F_LAST_BIT,
>>   };
>>   +typedef enum io_req_flags __bitwise io_req_flags_t;
>> +#define IO_REQ_FLAG(bitno)    ((__force io_req_flags_t) BIT_ULL((bitno)))
>> +
>>   enum {
>>       /* ctx owns file */
>> -    REQ_F_FIXED_FILE    = BIT(REQ_F_FIXED_FILE_BIT),
>> +    REQ_F_FIXED_FILE    = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT),
>>       /* drain existing IO first */
>> -    REQ_F_IO_DRAIN        = BIT(REQ_F_IO_DRAIN_BIT),
>> +    REQ_F_IO_DRAIN        = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT),
>>       /* linked sqes */
>> -    REQ_F_LINK        = BIT(REQ_F_LINK_BIT),
>> +    REQ_F_LINK        = IO_REQ_FLAG(REQ_F_LINK_BIT),
>>       /* doesn't sever on completion < 0 */
>> -    REQ_F_HARDLINK        = BIT(REQ_F_HARDLINK_BIT),
>> +    REQ_F_HARDLINK        = IO_REQ_FLAG(REQ_F_HARDLINK_BIT),
>>       /* IOSQE_ASYNC */
>> -    REQ_F_FORCE_ASYNC    = BIT(REQ_F_FORCE_ASYNC_BIT),
>> +    REQ_F_FORCE_ASYNC    = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT),
>>       /* IOSQE_BUFFER_SELECT */
>> -    REQ_F_BUFFER_SELECT    = BIT(REQ_F_BUFFER_SELECT_BIT),
>> +    REQ_F_BUFFER_SELECT    = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT),
>>       /* IOSQE_CQE_SKIP_SUCCESS */
>> -    REQ_F_CQE_SKIP        = BIT(REQ_F_CQE_SKIP_BIT),
>> +    REQ_F_CQE_SKIP        = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT),
>>         /* fail rest of links */
>> -    REQ_F_FAIL        = BIT(REQ_F_FAIL_BIT),
>> +    REQ_F_FAIL        = IO_REQ_FLAG(REQ_F_FAIL_BIT),
>>       /* on inflight list, should be cancelled and waited on exit reliably */
>> -    REQ_F_INFLIGHT        = BIT(REQ_F_INFLIGHT_BIT),
>> +    REQ_F_INFLIGHT        = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT),
>>       /* read/write uses file position */
>> -    REQ_F_CUR_POS        = BIT(REQ_F_CUR_POS_BIT),
>> +    REQ_F_CUR_POS        = IO_REQ_FLAG(REQ_F_CUR_POS_BIT),
>>       /* must not punt to workers */
>> -    REQ_F_NOWAIT        = BIT(REQ_F_NOWAIT_BIT),
>> +    REQ_F_NOWAIT        = IO_REQ_FLAG(REQ_F_NOWAIT_BIT),
>>       /* has or had linked timeout */
>> -    REQ_F_LINK_TIMEOUT    = BIT(REQ_F_LINK_TIMEOUT_BIT),
>> +    REQ_F_LINK_TIMEOUT    = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT),
>>       /* needs cleanup */
>> -    REQ_F_NEED_CLEANUP    = BIT(REQ_F_NEED_CLEANUP_BIT),
>> +    REQ_F_NEED_CLEANUP    = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
>>       /* already went through poll handler */
>> -    REQ_F_POLLED        = BIT(REQ_F_POLLED_BIT),
>> +    REQ_F_POLLED        = IO_REQ_FLAG(REQ_F_POLLED_BIT),
>>       /* buffer already selected */
>> -    REQ_F_BUFFER_SELECTED    = BIT(REQ_F_BUFFER_SELECTED_BIT),
>> +    REQ_F_BUFFER_SELECTED    = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
>>       /* buffer selected from ring, needs commit */
>> -    REQ_F_BUFFER_RING    = BIT(REQ_F_BUFFER_RING_BIT),
>> +    REQ_F_BUFFER_RING    = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT),
>>       /* caller should reissue async */
>> -    REQ_F_REISSUE        = BIT(REQ_F_REISSUE_BIT),
>> +    REQ_F_REISSUE        = IO_REQ_FLAG(REQ_F_REISSUE_BIT),
>>       /* supports async reads/writes */
>> -    REQ_F_SUPPORT_NOWAIT    = BIT(REQ_F_SUPPORT_NOWAIT_BIT),
>> +    REQ_F_SUPPORT_NOWAIT    = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT),
>>       /* regular file */
>> -    REQ_F_ISREG        = BIT(REQ_F_ISREG_BIT),
>> +    REQ_F_ISREG        = IO_REQ_FLAG(REQ_F_ISREG_BIT),
>>       /* has creds assigned */
>> -    REQ_F_CREDS        = BIT(REQ_F_CREDS_BIT),
>> +    REQ_F_CREDS        = IO_REQ_FLAG(REQ_F_CREDS_BIT),
>>       /* skip refcounting if not set */
>> -    REQ_F_REFCOUNT        = BIT(REQ_F_REFCOUNT_BIT),
>> +    REQ_F_REFCOUNT        = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT),
>>       /* there is a linked timeout that has to be armed */
>> -    REQ_F_ARM_LTIMEOUT    = BIT(REQ_F_ARM_LTIMEOUT_BIT),
>> +    REQ_F_ARM_LTIMEOUT    = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT),
>>       /* ->async_data allocated */
>> -    REQ_F_ASYNC_DATA    = BIT(REQ_F_ASYNC_DATA_BIT),
>> +    REQ_F_ASYNC_DATA    = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT),
>>       /* don't post CQEs while failing linked requests */
>> -    REQ_F_SKIP_LINK_CQES    = BIT(REQ_F_SKIP_LINK_CQES_BIT),
>> +    REQ_F_SKIP_LINK_CQES    = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT),
>>       /* single poll may be active */
>> -    REQ_F_SINGLE_POLL    = BIT(REQ_F_SINGLE_POLL_BIT),
>> +    REQ_F_SINGLE_POLL    = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT),
>>       /* double poll may active */
>> -    REQ_F_DOUBLE_POLL    = BIT(REQ_F_DOUBLE_POLL_BIT),
>> +    REQ_F_DOUBLE_POLL    = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT),
>>       /* request has already done partial IO */
>> -    REQ_F_PARTIAL_IO    = BIT(REQ_F_PARTIAL_IO_BIT),
>> +    REQ_F_PARTIAL_IO    = IO_REQ_FLAG(REQ_F_PARTIAL_IO_BIT),
>>       /* fast poll multishot mode */
>> -    REQ_F_APOLL_MULTISHOT    = BIT(REQ_F_APOLL_MULTISHOT_BIT),
>> +    REQ_F_APOLL_MULTISHOT    = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
>>       /* recvmsg special flag, clear EPOLLIN */
>> -    REQ_F_CLEAR_POLLIN    = BIT(REQ_F_CLEAR_POLLIN_BIT),
>> +    REQ_F_CLEAR_POLLIN    = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
>>       /* hashed into ->cancel_hash_locked, protected by ->uring_lock */
>> -    REQ_F_HASH_LOCKED    = BIT(REQ_F_HASH_LOCKED_BIT),
>> +    REQ_F_HASH_LOCKED    = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
>>       /* don't use lazy poll wake for this request */
>> -    REQ_F_POLL_NO_LAZY    = BIT(REQ_F_POLL_NO_LAZY_BIT),
>> +    REQ_F_POLL_NO_LAZY    = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
>>   };
>>     typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
>> @@ -592,15 +595,14 @@ struct io_kiocb {
>>        * and after selection it points to the buffer ID itself.
>>        */
>>       u16                buf_index;
>> -    unsigned int            flags;
>>   -    struct io_cqe            cqe;
> 
> With the current layout the min number of lines we touch per
> request is 2 (including the op specific 64B), that's includes
> setting up cqe at init and using it for completing. Moving cqe
> down makes it 3.
> 
>> +    atomic_t            refs;
> 
> We're pulling it refs, which is not touched at all in the hot
> path. Even if there's a hole I'd argue it's better to leave it
> at the end.
> 
>> +
>> +    io_req_flags_t            flags;
>>         struct io_ring_ctx        *ctx;
>>       struct task_struct        *task;
>>   -    struct io_rsrc_node        *rsrc_node;
> 
> It's used in hot paths, registered buffers/files, would be
> unfortunate to move it to the next line.

Yep I did feel a bit bad about that one... Let me take another stab at
it.

>> -
>>       union {
>>           /* store used ubuf, so we can prevent reloading */
>>           struct io_mapped_ubuf    *imu;
>> @@ -615,18 +617,23 @@ struct io_kiocb {
>>           struct io_buffer_list    *buf_list;
>>       };
>>   +    /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
>> +    struct hlist_node        hash_node;
>> +
> 
> And we're pulling hash_node into the hottest line, which is
> used only when we arm a poll and remove poll. So, it's mostly
> for networking, sends wouldn't use it much, and multishots
> wouldn't normally touch it.
> 
> As for ideas how to find space:
> 1) iopoll_completed completed can be converted to flags2

That's a good idea, but won't immediately find any space as it'd just
leave a hole anyway. But would be good to note in there perhaps, you
never know when it needs re-arranging again.

> 2) REQ_F_{SINGLE,DOUBLE}_POLL is a weird duplication. Can
> probably be combined into one flag, or removed at all.
> Again, sends are usually not so poll heavy and the hot
> path for recv is multishot.

Normal receive is also a hot path, even if multishot should be preferred
in general. Ditto on non-sockets but still pollable files, doing eg read
for example.

> 3) we can probably move req->task down and replace it with
> 
> get_task() {
>     if (req->ctx->flags & DEFER_TASKRUN)
>         task = ctx->submitter_task;
>     else
>         task = req->task;
> }

Assuming ctx flags is hot, which is would generally be, that's not a bad
idea at all.

I'll do another loop over this one.

-- 
Jens Axboe


  reply	other threads:[~2024-02-07  2:18 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-06 16:22 [PATCHSET next 0/6] Misc cleanups / optimizations Jens Axboe
2024-02-06 16:22 ` [PATCH 1/6] io_uring: expand main struct io_kiocb flags to 64-bits Jens Axboe
2024-02-06 22:58   ` Jens Axboe
2024-02-07  0:43   ` Pavel Begunkov
2024-02-07  2:18     ` Jens Axboe [this message]
2024-02-07  3:22       ` Pavel Begunkov
2024-02-06 16:22 ` [PATCH 2/6] io_uring: add io_file_can_poll() helper Jens Axboe
2024-02-07  0:57   ` Pavel Begunkov
2024-02-07  2:15     ` Jens Axboe
2024-02-07  3:33       ` Pavel Begunkov
2024-02-06 16:22 ` [PATCH 3/6] io_uring/cancel: don't default to setting req->work.cancel_seq Jens Axboe
2024-02-06 16:22 ` [PATCH 4/6] io_uring: move io_kiocb->nr_tw into comp_list union Jens Axboe
2024-02-06 16:22 ` [PATCH 5/6] io_uring: mark the need to lock/unlock the ring as unlikely Jens Axboe
2024-02-06 16:22 ` [PATCH 6/6] io_uring/rw: remove dead file == NULL check Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2024-02-07 17:17 [PATCHSET v2 0/6] Misc cleanups / optimizations Jens Axboe
2024-02-07 17:17 ` [PATCH 1/6] io_uring: expand main struct io_kiocb flags to 64-bits Jens Axboe
2024-02-08 20:08   ` Gabriel Krisman Bertazi
2024-02-08 20:22     ` Jens Axboe
2024-02-08 20:52       ` Gabriel Krisman Bertazi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6f55dbd7-62a3-48d0-bc5a-2ddddb69e9ac@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=asml.silence@gmail.com \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.