linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] io_uring: fix up O_NONBLOCK handling for sockets
@ 2019-10-17 15:24 Jens Axboe
  2019-10-17 20:52 ` Sasha Levin
  2019-10-21  0:23 ` Jackie Liu
  0 siblings, 2 replies; 4+ messages in thread
From: Jens Axboe @ 2019-10-17 15:24 UTC (permalink / raw)
  To: linux-block@vger.kernel.org; +Cc: Hrvoje Zeba

We've got two issues with the non-regular file handling for non-blocking
IO:

1) We don't want to re-do a short read in full for a non-regular file,
   as we can't just read the data again.
2) For non-regular files that don't support non-blocking IO attempts,
   we need to punt to async context even if the file is opened as
   non-blocking. Otherwise the caller always gets -EAGAIN.

Add two new request flags to handle these cases. One is just a cache
of the inode S_ISREG() status, the other tells io_uring that we always
need to punt this request to async context, even if REQ_F_NOWAIT is set.

Cc: stable@vger.kernel.org
Reported-by: Hrvoje Zeba <zeba.hrvoje@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 55 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d2cb277da2f4..a4ee5436cb61 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -322,6 +322,8 @@ struct io_kiocb {
 #define REQ_F_FAIL_LINK		256	/* fail rest of links */
 #define REQ_F_SHADOW_DRAIN	512	/* link-drain shadow req */
 #define REQ_F_TIMEOUT		1024	/* timeout request */
+#define REQ_F_ISREG		2048	/* regular file */
+#define REQ_F_MUST_PUNT		4096	/* must be punted even for NONBLOCK */
 	u64			user_data;
 	u32			result;
 	u32			sequence;
@@ -914,26 +916,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
 	return ret;
 }
 
-static void kiocb_end_write(struct kiocb *kiocb)
+static void kiocb_end_write(struct io_kiocb *req)
 {
-	if (kiocb->ki_flags & IOCB_WRITE) {
-		struct inode *inode = file_inode(kiocb->ki_filp);
+	/*
+	 * Tell lockdep we inherited freeze protection from submission
+	 * thread.
+	 */
+	if (req->flags & REQ_F_ISREG) {
+		struct inode *inode = file_inode(req->file);
 
-		/*
-		 * Tell lockdep we inherited freeze protection from submission
-		 * thread.
-		 */
-		if (S_ISREG(inode->i_mode))
-			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
-		file_end_write(kiocb->ki_filp);
+		__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
 	}
+	file_end_write(req->file);
 }
 
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
 
-	kiocb_end_write(kiocb);
+	if (kiocb->ki_flags & IOCB_WRITE)
+		kiocb_end_write(req);
 
 	if ((req->flags & REQ_F_LINK) && res != req->result)
 		req->flags |= REQ_F_FAIL_LINK;
@@ -945,7 +947,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
 
-	kiocb_end_write(kiocb);
+	if (kiocb->ki_flags & IOCB_WRITE)
+		kiocb_end_write(req);
 
 	if ((req->flags & REQ_F_LINK) && res != req->result)
 		req->flags |= REQ_F_FAIL_LINK;
@@ -1059,8 +1062,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
 	if (!req->file)
 		return -EBADF;
 
-	if (force_nonblock && !io_file_supports_async(req->file))
+	if (S_ISREG(file_inode(req->file)->i_mode))
+		req->flags |= REQ_F_ISREG;
+
+	/*
+	 * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
+	 * we know to async punt it even if it was opened O_NONBLOCK
+	 */
+	if (force_nonblock && !io_file_supports_async(req->file)) {
 		force_nonblock = false;
+		req->flags |= REQ_F_MUST_PUNT;
+	}
 
 	kiocb->ki_pos = READ_ONCE(sqe->off);
 	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
@@ -1081,7 +1093,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
 		return ret;
 
 	/* don't allow async punt if RWF_NOWAIT was requested */
-	if (kiocb->ki_flags & IOCB_NOWAIT)
+	if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+	    (req->file->f_flags & O_NONBLOCK))
 		req->flags |= REQ_F_NOWAIT;
 
 	if (force_nonblock)
@@ -1382,7 +1395,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
 		 * need async punt anyway, so it's more efficient to do it
 		 * here.
 		 */
-		if (force_nonblock && ret2 > 0 && ret2 < read_size)
+		if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
+		    (req->flags & REQ_F_ISREG) &&
+		    ret2 > 0 && ret2 < read_size)
 			ret2 = -EAGAIN;
 		/* Catch -EAGAIN return for forced non-blocking submission */
 		if (!force_nonblock || ret2 != -EAGAIN) {
@@ -1447,7 +1462,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
 		 * released so that it doesn't complain about the held lock when
 		 * we return to userspace.
 		 */
-		if (S_ISREG(file_inode(file)->i_mode)) {
+		if (req->flags & REQ_F_ISREG) {
 			__sb_start_write(file_inode(file)->i_sb,
 						SB_FREEZE_WRITE, true);
 			__sb_writers_release(file_inode(file)->i_sb,
@@ -2282,7 +2297,13 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	int ret;
 
 	ret = __io_submit_sqe(ctx, req, s, force_nonblock);
-	if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+
+	/*
+	 * We async punt it if the file wasn't marked NOWAIT, or if the file
+	 * doesn't support non-blocking read/write attempts
+	 */
+	if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
+	    (req->flags & REQ_F_MUST_PUNT))) {
 		struct io_uring_sqe *sqe_copy;
 
 		sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
-- 
2.17.1

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] io_uring: fix up O_NONBLOCK handling for sockets
  2019-10-17 15:24 [PATCH] io_uring: fix up O_NONBLOCK handling for sockets Jens Axboe
@ 2019-10-17 20:52 ` Sasha Levin
  2019-10-21  0:23 ` Jackie Liu
  1 sibling, 0 replies; 4+ messages in thread
From: Sasha Levin @ 2019-10-17 20:52 UTC (permalink / raw)
  To: Sasha Levin, Jens Axboe, linux-block@vger.kernel.org
  Cc: Hrvoje Zeba, stable, stable

Hi,

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all

The bot has tested the following trees: v5.3.6, v4.19.79, v4.14.149, v4.9.196, v4.4.196.

v5.3.6: Failed to apply! Possible dependencies:
    18d9be1a970c3 ("io_uring: add io_queue_async_work() helper")
    4fe2c963154c3 ("io_uring: add support for link with drain")
    5262f567987d3 ("io_uring: IORING_OP_TIMEOUT support")
    75b28affdd6ae ("io_uring: allocate the two rings together")
    c576666863b78 ("io_uring: optimize submit_and_wait API")

v4.19.79: Failed to apply! Possible dependencies:
    2b188cc1bb857 ("Add io_uring IO interface")
    31b515106428b ("io_uring: allow workqueue item to handle multiple buffered requests")
    4e21565b7fd4d ("asm-generic: add kexec_file_load system call to unistd.h")
    5262f567987d3 ("io_uring: IORING_OP_TIMEOUT support")
    6b06314c47e14 ("io_uring: add file set registration")
    9a56a2323dbbd ("io_uring: use fget/fput_many() for file references")
    c992fe2925d77 ("io_uring: add fsync support")
    d530a402a114e ("io_uring: add prepped flag")
    de0617e467171 ("io_uring: add support for marking commands as draining")
    def596e9557c9 ("io_uring: support for IO polling")
    edafccee56ff3 ("io_uring: add support for pre-mapped user IO buffers")

v4.14.149: Failed to apply! Possible dependencies:
    05c17cedf85ba ("x86: Wire up restartable sequence system call")
    1bd21c6c21e84 ("syscalls/core: Introduce CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y")
    2b188cc1bb857 ("Add io_uring IO interface")
    3c1c456f9b96c ("syscalls: sort syscall prototypes in include/linux/syscalls.h")
    4ddb45db30851 ("x86/syscalls: Use COMPAT_SYSCALL_DEFINEx() macros for x86-only compat syscalls")
    5262f567987d3 ("io_uring: IORING_OP_TIMEOUT support")
    5ac9efa3c50d7 ("syscalls/core, syscalls/x86: Clean up compat syscall stub naming convention")
    66f4e88cc69da ("x86/ioport: add ksys_ioperm() helper; remove in-kernel calls to sys_ioperm()")
    7a074e96dee62 ("aio: implement io_pgetevents")
    7c2178c1ff482 ("x86/syscalls: Use proper syscall definition for sys_ioperm()")
    ab0d1e85bfd0c ("fs/quota: use COMPAT_SYSCALL_DEFINE for sys32_quotactl()")
    af52201d99162 ("x86/entry: Do not special-case clone(2) in compat entry")
    b411991e0ca88 ("x86/syscalls/32: Simplify $entry == $compat entries")
    b51d3cdf44d5c ("x86: remove compat_sys_x86_waitpid()")
    d53238cd51a80 ("kernel: open-code sys_rt_sigpending() in sys_sigpending()")
    de0617e467171 ("io_uring: add support for marking commands as draining")
    dfe64506c01e5 ("x86/syscalls: Don't pointlessly reload the system call number")
    e145242ea0df6 ("syscalls/core, syscalls/x86: Clean up syscall stub naming convention")
    ebeb8c82ffaf9 ("syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32")
    fa697140f9a20 ("syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls")

v4.9.196: Failed to apply! Possible dependencies:
    05c17cedf85ba ("x86: Wire up restartable sequence system call")
    2611dc1939569 ("Remove compat_sys_getdents64()")
    2b188cc1bb857 ("Add io_uring IO interface")
    3a404842547c9 ("x86/entry: define _TIF_ALLWORK_MASK flags explicitly")
    499934898fcd1 ("x86/entry/64: Use ENTRY() instead of ALIGN+GLOBAL for stub32_clone()")
    4ddb45db30851 ("x86/syscalls: Use COMPAT_SYSCALL_DEFINEx() macros for x86-only compat syscalls")
    5262f567987d3 ("io_uring: IORING_OP_TIMEOUT support")
    5ac9efa3c50d7 ("syscalls/core, syscalls/x86: Clean up compat syscall stub naming convention")
    5ea0727b163cb ("x86/syscalls: Check address limit on user-mode return")
    66f4e88cc69da ("x86/ioport: add ksys_ioperm() helper; remove in-kernel calls to sys_ioperm()")
    7a074e96dee62 ("aio: implement io_pgetevents")
    7c2178c1ff482 ("x86/syscalls: Use proper syscall definition for sys_ioperm()")
    a528d35e8bfcc ("statx: Add a system call to make enhanced file info available")
    ab0d1e85bfd0c ("fs/quota: use COMPAT_SYSCALL_DEFINE for sys32_quotactl()")
    af52201d99162 ("x86/entry: Do not special-case clone(2) in compat entry")
    afb94c9e0b413 ("livepatch/x86: add TIF_PATCH_PENDING thread flag")
    b411991e0ca88 ("x86/syscalls/32: Simplify $entry == $compat entries")
    b51d3cdf44d5c ("x86: remove compat_sys_x86_waitpid()")
    de0617e467171 ("io_uring: add support for marking commands as draining")
    dfe64506c01e5 ("x86/syscalls: Don't pointlessly reload the system call number")
    e145242ea0df6 ("syscalls/core, syscalls/x86: Clean up syscall stub naming convention")
    ebeb8c82ffaf9 ("syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32")
    fa697140f9a20 ("syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls")

v4.4.196: Failed to apply! Possible dependencies:
    05c17cedf85ba ("x86: Wire up restartable sequence system call")
    1e423bff959e4 ("x86/entry/64: Migrate the 64-bit syscall slow path to C")
    2b188cc1bb857 ("Add io_uring IO interface")
    302f5b260c322 ("x86/entry/64: Always run ptregs-using syscalls on the slow path")
    3e65654e3db6d ("x86/syscalls: Move compat syscall entry handling into syscalltbl.sh")
    46eabf06c04a6 ("x86/entry/64: Call all native slow-path syscalls with full pt-regs")
    5262f567987d3 ("io_uring: IORING_OP_TIMEOUT support")
    5ac9efa3c50d7 ("syscalls/core, syscalls/x86: Clean up compat syscall stub naming convention")
    7a074e96dee62 ("aio: implement io_pgetevents")
    95d97adb2bb85 ("x86/signal: Cleanup get_nr_restart_syscall()")
    97245d00585d8 ("x86/entry: Get rid of pt_regs_to_thread_info()")
    abfb9498ee132 ("x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()")
    c87a85177e7a7 ("x86/entry: Get rid of two-phase syscall entry work")
    cfcbadb49dabb ("x86/syscalls: Add syscall entry qualifiers")
    de0617e467171 ("io_uring: add support for marking commands as draining")
    dfe64506c01e5 ("x86/syscalls: Don't pointlessly reload the system call number")
    e145242ea0df6 ("syscalls/core, syscalls/x86: Clean up syscall stub naming convention")
    ebeb8c82ffaf9 ("syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32")
    fa697140f9a20 ("syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls")
    fba324744bfd2 ("x86/syscalls: Refactor syscalltbl.sh")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

-- 
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] io_uring: fix up O_NONBLOCK handling for sockets
  2019-10-17 15:24 [PATCH] io_uring: fix up O_NONBLOCK handling for sockets Jens Axboe
  2019-10-17 20:52 ` Sasha Levin
@ 2019-10-21  0:23 ` Jackie Liu
  2019-10-21 14:26   ` Jens Axboe
  1 sibling, 1 reply; 4+ messages in thread
From: Jackie Liu @ 2019-10-21  0:23 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block@vger.kernel.org, Hrvoje Zeba



> 2019年10月17日 23:24,Jens Axboe <axboe@kernel.dk> 写道:
> 
> We've got two issues with the non-regular file handling for non-blocking
> IO:
> 
> 1) We don't want to re-do a short read in full for a non-regular file,
>   as we can't just read the data again.
> 2) For non-regular files that don't support non-blocking IO attempts,
>   we need to punt to async context even if the file is opened as
>   non-blocking. Otherwise the caller always gets -EAGAIN.
> 
> Add two new request flags to handle these cases. One is just a cache
> of the inode S_ISREG() status, the other tells io_uring that we always
> need to punt this request to async context, even if REQ_F_NOWAIT is set.
> 
> Cc: stable@vger.kernel.org
> Reported-by: Hrvoje Zeba <zeba.hrvoje@gmail.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
> fs/io_uring.c | 55 +++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 38 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index d2cb277da2f4..a4ee5436cb61 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -322,6 +322,8 @@ struct io_kiocb {
> #define REQ_F_FAIL_LINK		256	/* fail rest of links */
> #define REQ_F_SHADOW_DRAIN	512	/* link-drain shadow req */
> #define REQ_F_TIMEOUT		1024	/* timeout request */
> +#define REQ_F_ISREG		2048	/* regular file */
> +#define REQ_F_MUST_PUNT		4096	/* must be punted even for NONBLOCK */
> 	u64			user_data;
> 	u32			result;
> 	u32			sequence;
> @@ -914,26 +916,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
> 	return ret;
> }
> 
> -static void kiocb_end_write(struct kiocb *kiocb)
> +static void kiocb_end_write(struct io_kiocb *req)
> {
> -	if (kiocb->ki_flags & IOCB_WRITE) {
> -		struct inode *inode = file_inode(kiocb->ki_filp);
> +	/*
> +	 * Tell lockdep we inherited freeze protection from submission
> +	 * thread.
> +	 */
> +	if (req->flags & REQ_F_ISREG) {
> +		struct inode *inode = file_inode(req->file);
> 
> -		/*
> -		 * Tell lockdep we inherited freeze protection from submission
> -		 * thread.
> -		 */
> -		if (S_ISREG(inode->i_mode))
> -			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
> -		file_end_write(kiocb->ki_filp);
> +		__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
> 	}
> +	file_end_write(req->file);
> }
> 
> static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
> {
> 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
> 
> -	kiocb_end_write(kiocb);
> +	if (kiocb->ki_flags & IOCB_WRITE)
> +		kiocb_end_write(req);
> 
> 	if ((req->flags & REQ_F_LINK) && res != req->result)
> 		req->flags |= REQ_F_FAIL_LINK;
> @@ -945,7 +947,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
> {
> 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
> 
> -	kiocb_end_write(kiocb);
> +	if (kiocb->ki_flags & IOCB_WRITE)
> +		kiocb_end_write(req);
> 
> 	if ((req->flags & REQ_F_LINK) && res != req->result)
> 		req->flags |= REQ_F_FAIL_LINK;
> @@ -1059,8 +1062,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
> 	if (!req->file)
> 		return -EBADF;
> 
> -	if (force_nonblock && !io_file_supports_async(req->file))
> +	if (S_ISREG(file_inode(req->file)->i_mode))
> +		req->flags |= REQ_F_ISREG;
> +
> +	/*
> +	 * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
> +	 * we know to async punt it even if it was opened O_NONBLOCK
> +	 */
> +	if (force_nonblock && !io_file_supports_async(req->file)) {
> 		force_nonblock = false;
> +		req->flags |= REQ_F_MUST_PUNT;
> +	}

Hello Jens. that is your new version.

+	/*
+	 * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
+	 * we know to async punt it even if it was opened O_NONBLOCK
+	 */
+	if (force_nonblock && !io_file_supports_async(req->file)) {
+		req->flags |= REQ_F_MUST_PUNT;
+		return -EAGAIN;
+	}

So, if req->file don't support async, we always return EAGAIN immediately.
 

> 
> 	kiocb->ki_pos = READ_ONCE(sqe->off);
> 	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
> @@ -1081,7 +1093,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
> 		return ret;
> 
> 	/* don't allow async punt if RWF_NOWAIT was requested */
> -	if (kiocb->ki_flags & IOCB_NOWAIT)
> +	if ((kiocb->ki_flags & IOCB_NOWAIT) ||
> +	    (req->file->f_flags & O_NONBLOCK))

I think if we return -EAGAIN immediately, and using the work queue to execute this context, 
this is unnecessary.

> 		req->flags |= REQ_F_NOWAIT;
> 
> 	if (force_nonblock)
> @@ -1382,7 +1395,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
> 		 * need async punt anyway, so it's more efficient to do it
> 		 * here.
> 		 */
> -		if (force_nonblock && ret2 > 0 && ret2 < read_size)
> +		if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
> +		    (req->flags & REQ_F_ISREG) &&
> +		    ret2 > 0 && ret2 < read_size)
> 			ret2 = -EAGAIN;

This is also unnecessary because force_nonblock is always false.

--
Jackie Liu




^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] io_uring: fix up O_NONBLOCK handling for sockets
  2019-10-21  0:23 ` Jackie Liu
@ 2019-10-21 14:26   ` Jens Axboe
  0 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2019-10-21 14:26 UTC (permalink / raw)
  To: Jackie Liu; +Cc: linux-block@vger.kernel.org, Hrvoje Zeba

On 10/20/19 6:23 PM, Jackie Liu wrote:
> 
> 
>> 2019年10月17日 23:24,Jens Axboe <axboe@kernel.dk> 写道:
>>
>> We've got two issues with the non-regular file handling for non-blocking
>> IO:
>>
>> 1) We don't want to re-do a short read in full for a non-regular file,
>>    as we can't just read the data again.
>> 2) For non-regular files that don't support non-blocking IO attempts,
>>    we need to punt to async context even if the file is opened as
>>    non-blocking. Otherwise the caller always gets -EAGAIN.
>>
>> Add two new request flags to handle these cases. One is just a cache
>> of the inode S_ISREG() status, the other tells io_uring that we always
>> need to punt this request to async context, even if REQ_F_NOWAIT is set.
>>
>> Cc: stable@vger.kernel.org
>> Reported-by: Hrvoje Zeba <zeba.hrvoje@gmail.com>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>> fs/io_uring.c | 55 +++++++++++++++++++++++++++++++++++----------------
>> 1 file changed, 38 insertions(+), 17 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index d2cb277da2f4..a4ee5436cb61 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -322,6 +322,8 @@ struct io_kiocb {
>> #define REQ_F_FAIL_LINK		256	/* fail rest of links */
>> #define REQ_F_SHADOW_DRAIN	512	/* link-drain shadow req */
>> #define REQ_F_TIMEOUT		1024	/* timeout request */
>> +#define REQ_F_ISREG		2048	/* regular file */
>> +#define REQ_F_MUST_PUNT		4096	/* must be punted even for NONBLOCK */
>> 	u64			user_data;
>> 	u32			result;
>> 	u32			sequence;
>> @@ -914,26 +916,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
>> 	return ret;
>> }
>>
>> -static void kiocb_end_write(struct kiocb *kiocb)
>> +static void kiocb_end_write(struct io_kiocb *req)
>> {
>> -	if (kiocb->ki_flags & IOCB_WRITE) {
>> -		struct inode *inode = file_inode(kiocb->ki_filp);
>> +	/*
>> +	 * Tell lockdep we inherited freeze protection from submission
>> +	 * thread.
>> +	 */
>> +	if (req->flags & REQ_F_ISREG) {
>> +		struct inode *inode = file_inode(req->file);
>>
>> -		/*
>> -		 * Tell lockdep we inherited freeze protection from submission
>> -		 * thread.
>> -		 */
>> -		if (S_ISREG(inode->i_mode))
>> -			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
>> -		file_end_write(kiocb->ki_filp);
>> +		__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
>> 	}
>> +	file_end_write(req->file);
>> }
>>
>> static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
>> {
>> 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
>>
>> -	kiocb_end_write(kiocb);
>> +	if (kiocb->ki_flags & IOCB_WRITE)
>> +		kiocb_end_write(req);
>>
>> 	if ((req->flags & REQ_F_LINK) && res != req->result)
>> 		req->flags |= REQ_F_FAIL_LINK;
>> @@ -945,7 +947,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
>> {
>> 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
>>
>> -	kiocb_end_write(kiocb);
>> +	if (kiocb->ki_flags & IOCB_WRITE)
>> +		kiocb_end_write(req);
>>
>> 	if ((req->flags & REQ_F_LINK) && res != req->result)
>> 		req->flags |= REQ_F_FAIL_LINK;
>> @@ -1059,8 +1062,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
>> 	if (!req->file)
>> 		return -EBADF;
>>
>> -	if (force_nonblock && !io_file_supports_async(req->file))
>> +	if (S_ISREG(file_inode(req->file)->i_mode))
>> +		req->flags |= REQ_F_ISREG;
>> +
>> +	/*
>> +	 * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
>> +	 * we know to async punt it even if it was opened O_NONBLOCK
>> +	 */
>> +	if (force_nonblock && !io_file_supports_async(req->file)) {
>> 		force_nonblock = false;
>> +		req->flags |= REQ_F_MUST_PUNT;
>> +	}
> 
> Hello Jens. that is your new version.
> 
> +	/*
> +	 * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
> +	 * we know to async punt it even if it was opened O_NONBLOCK
> +	 */
> +	if (force_nonblock && !io_file_supports_async(req->file)) {
> +		req->flags |= REQ_F_MUST_PUNT;
> +		return -EAGAIN;
> +	}
> 
> So, if req->file don't support async, we always return EAGAIN immediately.

Right

>> 	kiocb->ki_pos = READ_ONCE(sqe->off);
>> 	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
>> @@ -1081,7 +1093,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
>> 		return ret;
>>
>> 	/* don't allow async punt if RWF_NOWAIT was requested */
>> -	if (kiocb->ki_flags & IOCB_NOWAIT)
>> +	if ((kiocb->ki_flags & IOCB_NOWAIT) ||
>> +	    (req->file->f_flags & O_NONBLOCK))
> 
> I think if we return -EAGAIN immediately, and using the work queue to execute this context,
> this is unnecessary.
> 
>> 		req->flags |= REQ_F_NOWAIT;
>>
>> 	if (force_nonblock)
>> @@ -1382,7 +1395,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
>> 		 * need async punt anyway, so it's more efficient to do it
>> 		 * here.
>> 		 */
>> -		if (force_nonblock && ret2 > 0 && ret2 < read_size)
>> +		if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
>> +		    (req->flags & REQ_F_ISREG) &&
>> +		    ret2 > 0 && ret2 < read_size)
>> 			ret2 = -EAGAIN;
> 
> This is also unnecessary because force_nonblock is always false.

I totally agree, with the above direct return change, these aren't
strictly necessary. But I kept them to be on the safe side.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-10-21 14:26 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-10-17 15:24 [PATCH] io_uring: fix up O_NONBLOCK handling for sockets Jens Axboe
2019-10-17 20:52 ` Sasha Levin
2019-10-21  0:23 ` Jackie Liu
2019-10-21 14:26   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).