- * [PATCH 01/11] block: read-ahead submission should imply no-wait as well
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 02/11] mm: allow read-ahead with IOCB_NOWAIT set Jens Axboe
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
As read-ahead is opportunistic, don't block for request allocation.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ccb895f911b1..c296463c15eb 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -374,7 +374,8 @@ enum req_flag_bits {
 #define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
 #define REQ_FUA			(1ULL << __REQ_FUA)
 #define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
-#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
+#define REQ_RAHEAD		\
+	((1ULL << __REQ_RAHEAD) | (1ULL << __REQ_NOWAIT))
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
 #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 02/11] mm: allow read-ahead with IOCB_NOWAIT set
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
  2020-05-23  1:50 ` [PATCH 01/11] block: read-ahead submission should imply no-wait as well Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 03/11] mm: add support for async page locking Jens Axboe
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
The read-ahead shouldn't block, so allow it to be done even if
IOCB_NOWAIT is set in the kiocb.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 mm/filemap.c | 2 --
 1 file changed, 2 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 23a051a7ef0f..80747f1377d5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2031,8 +2031,6 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 
 		page = find_get_page(mapping, index);
 		if (!page) {
-			if (iocb->ki_flags & IOCB_NOWAIT)
-				goto would_block;
 			page_cache_sync_readahead(mapping,
 					ra, filp,
 					index, last_index - index);
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 03/11] mm: add support for async page locking
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
  2020-05-23  1:50 ` [PATCH 01/11] block: read-ahead submission should imply no-wait as well Jens Axboe
  2020-05-23  1:50 ` [PATCH 02/11] mm: allow read-ahead with IOCB_NOWAIT set Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 04/11] mm: support async buffered reads in generic_file_buffered_read() Jens Axboe
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Normally waiting for a page to become unlocked, or locking the page,
requires waiting for IO to complete. Add support for lock_page_async()
and wait_on_page_locked_async(), which are callback based instead. This
allows a caller to get notified when a page becomes unlocked, rather
than wait for it.
We use the iocb->private field to pass in this necessary data for this
to happen. struct wait_page_key is made public, and we define struct
wait_page_async as the interface between the caller and the core.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h      |  2 ++
 include/linux/pagemap.h | 21 ++++++++++++++++
 mm/filemap.c            | 56 +++++++++++++++++++++++++++++++++++------
 3 files changed, 72 insertions(+), 7 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e84d823c6a8..82b989695ab9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -314,6 +314,8 @@ enum rw_hint {
 #define IOCB_SYNC		(1 << 5)
 #define IOCB_WRITE		(1 << 6)
 #define IOCB_NOWAIT		(1 << 7)
+/* iocb->private holds wait_page_async struct */
+#define IOCB_WAITQ		(1 << 8)
 
 struct kiocb {
 	struct file		*ki_filp;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a8f7bd8ea1c6..e260bcd071e4 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -456,8 +456,21 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 	return pgoff;
 }
 
+/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
+struct wait_page_key {
+	struct page *page;
+	int bit_nr;
+	int page_match;
+};
+
+struct wait_page_async {
+	struct wait_queue_entry wait;
+	struct wait_page_key key;
+};
+
 extern void __lock_page(struct page *page);
 extern int __lock_page_killable(struct page *page);
+extern int __lock_page_async(struct page *page, struct wait_page_async *wait);
 extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 				unsigned int flags);
 extern void unlock_page(struct page *page);
@@ -494,6 +507,14 @@ static inline int lock_page_killable(struct page *page)
 	return 0;
 }
 
+static inline int lock_page_async(struct page *page,
+				  struct wait_page_async *wait)
+{
+	if (!trylock_page(page))
+		return __lock_page_async(page, wait);
+	return 0;
+}
+
 /*
  * lock_page_or_retry - Lock the page, unless this would block and the
  * caller indicated that it can handle a retry.
diff --git a/mm/filemap.c b/mm/filemap.c
index 80747f1377d5..a01daafd49fd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -990,13 +990,6 @@ void __init pagecache_init(void)
 	page_writeback_init();
 }
 
-/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
-struct wait_page_key {
-	struct page *page;
-	int bit_nr;
-	int page_match;
-};
-
 struct wait_page_queue {
 	struct page *page;
 	int bit_nr;
@@ -1210,6 +1203,50 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 }
 EXPORT_SYMBOL(wait_on_page_bit_killable);
 
+static int __wait_on_page_locked_async(struct page *page,
+				       struct wait_page_async *wait, bool set)
+{
+	struct wait_queue_head *q = page_waitqueue(page);
+	int ret = 0;
+
+	wait->key.page = page;
+	wait->key.bit_nr = PG_locked;
+
+	spin_lock_irq(&q->lock);
+	if (set)
+		ret = !trylock_page(page);
+	else
+		ret = PageLocked(page);
+	if (ret) {
+		__add_wait_queue_entry_tail(q, &wait->wait);
+		SetPageWaiters(page);
+		if (set)
+			ret = !trylock_page(page);
+		else
+			ret = PageLocked(page);
+		/*
+		 * If we were succesful now, we know we're still on the
+		 * waitqueue as we're still under the lock. This means it's
+		 * safe to remove and return success, we know the callback
+		 * isn't going to trigger.
+		 */
+		if (!ret)
+			__remove_wait_queue(q, &wait->wait);
+		else
+			ret = -EIOCBQUEUED;
+	}
+	spin_unlock_irq(&q->lock);
+	return ret;
+}
+
+static int wait_on_page_locked_async(struct page *page,
+				     struct wait_page_async *wait)
+{
+	if (!PageLocked(page))
+		return 0;
+	return __wait_on_page_locked_async(compound_head(page), wait, false);
+}
+
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -1372,6 +1409,11 @@ int __lock_page_killable(struct page *__page)
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
+int __lock_page_async(struct page *page, struct wait_page_async *wait)
+{
+	return __wait_on_page_locked_async(page, wait, true);
+}
+
 /*
  * Return values:
  * 1 - page is locked; mmap_sem is still held.
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 04/11] mm: support async buffered reads in generic_file_buffered_read()
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
                   ` (2 preceding siblings ...)
  2020-05-23  1:50 ` [PATCH 03/11] mm: add support for async page locking Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 05/11] fs: add FMODE_BUF_RASYNC Jens Axboe
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Use the async page locking infrastructure, if IOCB_WAITQ is set in the
passed in iocb. The caller must expect an -EIOCBQUEUED return value,
which means that IO is started but not done yet. This is similar to how
O_DIRECT signals the same operation. Once the callback is received by
the caller for IO completion, the caller must retry the operation.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 mm/filemap.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index a01daafd49fd..b49836ff0fdc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2086,17 +2086,25 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 					index, last_index - index);
 		}
 		if (!PageUptodate(page)) {
-			if (iocb->ki_flags & IOCB_NOWAIT) {
-				put_page(page);
-				goto would_block;
-			}
-
 			/*
 			 * See comment in do_read_cache_page on why
 			 * wait_on_page_locked is used to avoid unnecessarily
 			 * serialisations and why it's safe.
 			 */
-			error = wait_on_page_locked_killable(page);
+			if (iocb->ki_flags & IOCB_WAITQ) {
+				if (written) {
+					put_page(page);
+					goto out;
+				}
+				error = wait_on_page_locked_async(page,
+								iocb->private);
+			} else {
+				if (iocb->ki_flags & IOCB_NOWAIT) {
+					put_page(page);
+					goto would_block;
+				}
+				error = wait_on_page_locked_killable(page);
+			}
 			if (unlikely(error))
 				goto readpage_error;
 			if (PageUptodate(page))
@@ -2184,7 +2192,10 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 
 page_not_up_to_date:
 		/* Get exclusive access to the page ... */
-		error = lock_page_killable(page);
+		if (iocb->ki_flags & IOCB_WAITQ)
+			error = lock_page_async(page, iocb->private);
+		else
+			error = lock_page_killable(page);
 		if (unlikely(error))
 			goto readpage_error;
 
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 05/11] fs: add FMODE_BUF_RASYNC
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
                   ` (3 preceding siblings ...)
  2020-05-23  1:50 ` [PATCH 04/11] mm: support async buffered reads in generic_file_buffered_read() Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 06/11] ext4: flag as supporting buffered async reads Jens Axboe
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
If set, this indicates that the file system supports IOCB_WAITQ for
buffered reads.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 82b989695ab9..0ef5f5973b1c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File does not contribute to nr_files count */
 #define FMODE_NOACCOUNT		((__force fmode_t)0x20000000)
 
+/* File supports async buffered reads */
+#define FMODE_BUF_RASYNC	((__force fmode_t)0x40000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 06/11] ext4: flag as supporting buffered async reads
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
                   ` (4 preceding siblings ...)
  2020-05-23  1:50 ` [PATCH 05/11] fs: add FMODE_BUF_RASYNC Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 07/11] block: flag block devices as supporting IOCB_WAITQ Jens Axboe
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/ext4/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 0d624250a62b..9f7d9bf427b4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -826,7 +826,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 			return ret;
 	}
 
-	filp->f_mode |= FMODE_NOWAIT;
+	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
 	return dquot_file_open(inode, filp);
 }
 
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 07/11] block: flag block devices as supporting IOCB_WAITQ
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
                   ` (5 preceding siblings ...)
  2020-05-23  1:50 ` [PATCH 06/11] ext4: flag as supporting buffered async reads Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 08/11] xfs: flag files as supporting buffered async reads Jens Axboe
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 86e2a7134513..ec8dccc81b65 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1851,7 +1851,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	 */
 	filp->f_flags |= O_LARGEFILE;
 
-	filp->f_mode |= FMODE_NOWAIT;
+	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
 
 	if (filp->f_flags & O_NDELAY)
 		filp->f_mode |= FMODE_NDELAY;
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 08/11] xfs: flag files as supporting buffered async reads
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
                   ` (6 preceding siblings ...)
  2020-05-23  1:50 ` [PATCH 07/11] block: flag block devices as supporting IOCB_WAITQ Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 09/11] btrfs: " Jens Axboe
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
XFS uses generic_file_read_iter(), which already supports this.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/xfs/xfs_file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 4b8bdecc3863..97f44fbf17f2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1080,7 +1080,7 @@ xfs_file_open(
 		return -EFBIG;
 	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
 		return -EIO;
-	file->f_mode |= FMODE_NOWAIT;
+	file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
 	return 0;
 }
 
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 09/11] btrfs: flag files as supporting buffered async reads
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
                   ` (7 preceding siblings ...)
  2020-05-23  1:50 ` [PATCH 08/11] xfs: flag files as supporting buffered async reads Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 10/11] mm: add kiocb_wait_page_async_init() helper Jens Axboe
  2020-05-23  1:50 ` [PATCH 11/11] io_uring: support true async buffered reads, if file provides it Jens Axboe
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
btrfs uses generic_file_read_iter(), which already supports this.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 719e68ab552c..c933b6a1b4a8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3480,7 +3480,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
 
 static int btrfs_file_open(struct inode *inode, struct file *filp)
 {
-	filp->f_mode |= FMODE_NOWAIT;
+	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
 	return generic_file_open(inode, filp);
 }
 
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 10/11] mm: add kiocb_wait_page_async_init() helper
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
                   ` (8 preceding siblings ...)
  2020-05-23  1:50 ` [PATCH 09/11] btrfs: " Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  2020-05-23  1:50 ` [PATCH 11/11] io_uring: support true async buffered reads, if file provides it Jens Axboe
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
Checks if the file supports it, and initializes the values that we need.
Caller passes in 'data' pointer, if any, and the callback function to
be used.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/pagemap.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e260bcd071e4..21ced353310a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -468,6 +468,24 @@ struct wait_page_async {
 	struct wait_page_key key;
 };
 
+static inline int kiocb_wait_page_async_init(struct kiocb *kiocb,
+					     struct wait_page_async *wait,
+					     wait_queue_func_t func,
+					     void *data)
+{
+	if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
+		wait->wait.func = func;
+		wait->wait.private = data;
+		wait->wait.flags = 0;
+		INIT_LIST_HEAD(&wait->wait.entry);
+		kiocb->ki_flags |= IOCB_WAITQ;
+		kiocb->private = wait;
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
 extern void __lock_page(struct page *page);
 extern int __lock_page_killable(struct page *page);
 extern int __lock_page_async(struct page *page, struct wait_page_async *wait);
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread
- * [PATCH 11/11] io_uring: support true async buffered reads, if file provides it
  2020-05-23  1:50 [PATCHSET v2 RFC 0/11] Add support for async buffered reads Jens Axboe
                   ` (9 preceding siblings ...)
  2020-05-23  1:50 ` [PATCH 10/11] mm: add kiocb_wait_page_async_init() helper Jens Axboe
@ 2020-05-23  1:50 ` Jens Axboe
  10 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-05-23  1:50 UTC (permalink / raw)
  To: io-uring; +Cc: linux-fsdevel, linux-kernel, linux-mm, Jens Axboe
If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt
the buffered read to an io-wq worker. Instead we can rely on page
unlocking callbacks to support retry based async IO. This is a lot more
efficient than doing async thread offload.
The retry is done similarly to how we handle poll based retry. From
the unlock callback, we simply queue the retry to a task_work based
handler.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e95481c552ff..9eeae10db648 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -498,6 +498,8 @@ struct io_async_rw {
 	struct iovec			*iov;
 	ssize_t				nr_segs;
 	ssize_t				size;
+	struct wait_page_async		wait;
+	struct callback_head		task_work;
 };
 
 struct io_async_ctx {
@@ -2568,6 +2570,102 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	return 0;
 }
 
+static void io_async_buf_cancel(struct callback_head *cb)
+{
+	struct io_async_rw *rw;
+	struct io_ring_ctx *ctx;
+	struct io_kiocb *req;
+
+	rw = container_of(cb, struct io_async_rw, task_work);
+	req = rw->wait.wait.private;
+	ctx = req->ctx;
+
+	spin_lock_irq(&ctx->completion_lock);
+	io_cqring_fill_event(req, -ECANCELED);
+	io_commit_cqring(ctx);
+	spin_unlock_irq(&ctx->completion_lock);
+
+	io_cqring_ev_posted(ctx);
+	req_set_fail_links(req);
+	io_double_put_req(req);
+}
+
+static void io_async_buf_retry(struct callback_head *cb)
+{
+	struct io_async_rw *rw;
+	struct io_ring_ctx *ctx;
+	struct io_kiocb *req;
+
+	rw = container_of(cb, struct io_async_rw, task_work);
+	req = rw->wait.wait.private;
+	ctx = req->ctx;
+
+	__set_current_state(TASK_RUNNING);
+	mutex_lock(&ctx->uring_lock);
+	__io_queue_sqe(req, NULL);
+	mutex_unlock(&ctx->uring_lock);
+}
+
+static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
+			     int sync, void *arg)
+{
+	struct wait_page_async *wp;
+	struct io_kiocb *req = wait->private;
+	struct io_async_rw *rw = &req->io->rw;
+	struct wait_page_key *key = arg;
+	struct task_struct *tsk;
+	int ret;
+
+	wp = container_of(wait, struct wait_page_async, wait);
+	if (wp->key.page != key->page)
+		return 0;
+	key->page_match = 1;
+	if (wp->key.bit_nr != key->bit_nr)
+		return 0;
+	if (test_bit(PG_locked, &key->page->flags))
+		return -1;
+
+	list_del_init(&wait->entry);
+
+	init_task_work(&rw->task_work, io_async_buf_retry);
+	/* submit ref gets dropped, acquire a new one */
+	refcount_inc(&req->refs);
+	tsk = req->task;
+	ret = task_work_add(tsk, &rw->task_work, true);
+	if (unlikely(ret)) {
+		/* queue just for cancelation */
+		init_task_work(&rw->task_work, io_async_buf_cancel);
+		tsk = io_wq_get_task(req->ctx->io_wq);
+		task_work_add(tsk, &rw->task_work, true);
+	}
+	wake_up_process(tsk);
+	return 1;
+}
+
+static bool io_rw_should_retry(struct io_kiocb *req)
+{
+	struct kiocb *kiocb = &req->rw.kiocb;
+	int ret;
+
+	/* already tried, or we're doing O_DIRECT */
+	if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ))
+		return false;
+	/*
+	 * just use poll if we can, and don't attempt if the fs doesn't
+	 * support callback based unlocks
+	 */
+	if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
+		return false;
+
+	ret = kiocb_wait_page_async_init(kiocb, &req->io->rw.wait,
+						io_async_buf_func, req);
+	if (ret)
+		return false;
+	get_task_struct(current);
+	req->task = current;
+	return true;
+}
+
 static int io_read(struct io_kiocb *req, bool force_nonblock)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -2601,6 +2699,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
 	if (!ret) {
 		ssize_t ret2;
 
+retry:
 		if (req->file->f_op->read_iter)
 			ret2 = call_read_iter(req->file, kiocb, &iter);
 		else
@@ -2619,6 +2718,9 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
 			if (!(req->flags & REQ_F_NOWAIT) &&
 			    !file_can_poll(req->file))
 				req->flags |= REQ_F_MUST_PUNT;
+			if (io_rw_should_retry(req))
+				goto retry;
+			kiocb->ki_flags &= ~IOCB_WAITQ;
 			return -EAGAIN;
 		}
 	}
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 13+ messages in thread