* [PATCH 1/8] nowait aio: Introduce IOCB_RW_FLAG_NOWAIT
2017-03-15 21:50 [PATCH 0/8 v3] No wait AIO Goldwyn Rodrigues
@ 2017-03-15 21:51 ` Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 2/8] nowait aio: Return if cannot get hold of i_rwsem Goldwyn Rodrigues
` (6 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-15 21:51 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
This flag informs kernel to bail out if an AIO request will block
for reasons such as file allocations, or a writeback triggered,
or would block while allocating requests while performing
direct I/O.
Unfortunately, aio_flags is not checked for validity. If we
add the flags to aio_flags, it would break existing applications
which have it set to anything besides zero or IOCB_FLAG_RESFD.
So, we are using aio_reserved1 and renaming it to aio_rw_flags.
IOCB_RW_FLAG_NOWAIT is translated to IOCB_NOWAIT for
iocb->ki_flags.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/aio.c | 10 +++++++++-
include/linux/fs.h | 1 +
include/uapi/linux/aio_abi.h | 9 ++++++++-
3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index f52d925..41409ac 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,11 +1541,16 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
ssize_t ret;
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
+ if (unlikely(iocb->aio_rw_flags & ~IOCB_RW_FLAG_NOWAIT)) {
+ pr_debug("EINVAL: aio_rw_flags set with incompatible flags\n");
+ return -EINVAL;
+ }
+
/* prevent overflows */
if (unlikely(
(iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
@@ -1586,6 +1591,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_flags |= IOCB_EVENTFD;
}
+ if (iocb->aio_rw_flags & IOCB_RW_FLAG_NOWAIT)
+ req->common.ki_flags |= IOCB_NOWAIT;
+
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
pr_debug("EFAULT: aio_key\n");
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7251f7b..e8d9346 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -270,6 +270,7 @@ struct writeback_control;
#define IOCB_DSYNC (1 << 4)
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
+#define IOCB_NOWAIT (1 << 7)
struct kiocb {
struct file *ki_filp;
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index bb2554f..6d98cbe 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -54,6 +54,13 @@ enum {
*/
#define IOCB_FLAG_RESFD (1 << 0)
+/*
+ * Flags for aio_rw_flags member of "struct iocb".
+ * IOCB_RW_FLAG_NOWAIT - Set if the user wants the iocb to fail if it
+ * would block for operations such as disk allocation.
+ */
+#define IOCB_RW_FLAG_NOWAIT (1 << 1)
+
/* read() from /dev/aio returns these structures. */
struct io_event {
__u64 data; /* the data field from the iocb */
@@ -79,7 +86,7 @@ struct io_event {
struct iocb {
/* these are internal to the kernel/libc. */
__u64 aio_data; /* data to be returned in event's data */
- __u32 PADDED(aio_key, aio_reserved1);
+ __u32 PADDED(aio_key, aio_rw_flags);
/* the kernel sets aio_key to the req # */
/* common fields */
--
2.10.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 2/8] nowait aio: Return if cannot get hold of i_rwsem
2017-03-15 21:50 [PATCH 0/8 v3] No wait AIO Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 1/8] nowait aio: Introduce IOCB_RW_FLAG_NOWAIT Goldwyn Rodrigues
@ 2017-03-15 21:51 ` Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 3/8] nowait aio: return if direct write will trigger writeback Goldwyn Rodrigues
` (5 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-15 21:51 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
A failure to lock i_rwsem would mean there is I/O being performed
by another thread. So, let's bail.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
mm/filemap.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 1694623..e08f3b9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2982,7 +2982,12 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file->f_mapping->host;
ssize_t ret;
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ /* Don't sleep on inode rwsem */
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
ret = generic_write_checks(iocb, from);
if (ret > 0)
ret = __generic_file_write_iter(iocb, from);
--
2.10.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 3/8] nowait aio: return if direct write will trigger writeback
2017-03-15 21:50 [PATCH 0/8 v3] No wait AIO Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 1/8] nowait aio: Introduce IOCB_RW_FLAG_NOWAIT Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 2/8] nowait aio: Return if cannot get hold of i_rwsem Goldwyn Rodrigues
@ 2017-03-15 21:51 ` Goldwyn Rodrigues
2017-03-16 13:08 ` Matthew Wilcox
2017-03-16 13:20 ` Matthew Wilcox
2017-03-15 21:51 ` [PATCH 4/8] nowait-aio: Introduce IOMAP_NOWAIT Goldwyn Rodrigues
` (4 subsequent siblings)
7 siblings, 2 replies; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-15 21:51 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Find out if the write will trigger a wait due to writeback. If yes,
return -EAGAIN.
This introduces a new function filemap_range_has_page() which
returns true if the file's mapping has a page within the range
mentioned.
Return -EINVAL for buffered AIO: there are multiple causes of
delay such as page locks, dirty throttling logic, page loading
from disk etc. which cannot be taken care of.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
include/linux/fs.h | 2 ++
mm/filemap.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 49 insertions(+), 3 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e8d9346..4a30e8f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2514,6 +2514,8 @@ extern int filemap_fdatawait(struct address_space *);
extern void filemap_fdatawait_keep_errors(struct address_space *);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
+extern int filemap_range_has_page(struct address_space *, loff_t lstart,
+ loff_t lend);
extern int filemap_write_and_wait(struct address_space *mapping);
extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index e08f3b9..c020e23 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,39 @@ int filemap_flush(struct address_space *mapping)
}
EXPORT_SYMBOL(filemap_flush);
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping: address space structure to wait for
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+int filemap_range_has_page(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
+{
+ pgoff_t index = start_byte >> PAGE_SHIFT;
+ pgoff_t end = end_byte >> PAGE_SHIFT;
+ struct pagevec pvec;
+ int ret;
+
+ if (end_byte < start_byte)
+ return 0;
+
+ if (mapping->nrpages == 0)
+ return 0;
+
+ pagevec_init(&pvec, 0);
+ ret = pagevec_lookup(&pvec, mapping, index, 1);
+ if (!ret)
+ return 0;
+ ret = (pvec.pages[0]->index <= end);
+ pagevec_release(&pvec);
+ return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
static int __filemap_fdatawait_range(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
@@ -2640,6 +2673,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
pos = iocb->ki_pos;
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+ return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2709,9 +2745,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
- written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
- if (written)
- goto out;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* If there are pages to writeback, return */
+ if (filemap_range_has_page(inode->i_mapping, pos,
+ pos + iov_iter_count(from)))
+ return -EAGAIN;
+ } else {
+ written = filemap_write_and_wait_range(mapping, pos,
+ pos + write_len - 1);
+ if (written)
+ goto out;
+ }
/*
* After a write we want buffered reads to be sure to go to disk to get
--
2.10.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback
2017-03-15 21:51 ` [PATCH 3/8] nowait aio: return if direct write will trigger writeback Goldwyn Rodrigues
@ 2017-03-16 13:08 ` Matthew Wilcox
2017-03-16 13:46 ` Goldwyn Rodrigues
2017-03-16 13:20 ` Matthew Wilcox
1 sibling, 1 reply; 21+ messages in thread
From: Matthew Wilcox @ 2017-03-16 13:08 UTC (permalink / raw)
To: Goldwyn Rodrigues
Cc: linux-fsdevel, jack, hch, linux-block, linux-btrfs, linux-ext4,
linux-xfs, sagi, avi, axboe, linux-api, Goldwyn Rodrigues
On Wed, Mar 15, 2017 at 04:51:02PM -0500, Goldwyn Rodrigues wrote:
> This introduces a new function filemap_range_has_page() which
> returns true if the file's mapping has a page within the range
> mentioned.
I thought you were going to replace this patch with one that starts
writeback for these pages but does not wait for them?
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback
2017-03-16 13:08 ` Matthew Wilcox
@ 2017-03-16 13:46 ` Goldwyn Rodrigues
0 siblings, 0 replies; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-16 13:46 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-fsdevel, jack, hch, linux-block, linux-btrfs, linux-ext4,
linux-xfs, sagi, avi, axboe, linux-api, Goldwyn Rodrigues
On 03/16/2017 08:08 AM, Matthew Wilcox wrote:
> On Wed, Mar 15, 2017 at 04:51:02PM -0500, Goldwyn Rodrigues wrote:
>> This introduces a new function filemap_range_has_page() which
>> returns true if the file's mapping has a page within the range
>> mentioned.
>
> I thought you were going to replace this patch with one that starts
> writeback for these pages but does not wait for them?
>
As mentioned by Jan, Flags to filemap_write_and_wait_range are
unnecessarily complicated. The AIO-DIO API users who eye for performance
usually are careful with page writes/evictions. As a fallback, they can
(and should) go the wait route (without IOCB_RW_FLAG_NOWAIT).
Finally, my take on this is that we don't want to perform tasks for a
following system call, which may or may not immediately follow the
current one. May not, because an application (DB) will offload the task
from the CPU thread to the I/O thread in case of -EAGAIN. A system call
should be complete in itself (and do the minimum, what is asked).
--
Goldwyn
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback
2017-03-15 21:51 ` [PATCH 3/8] nowait aio: return if direct write will trigger writeback Goldwyn Rodrigues
2017-03-16 13:08 ` Matthew Wilcox
@ 2017-03-16 13:20 ` Matthew Wilcox
[not found] ` <20170316132052.GG4033-PfSpb0PWhxZc2C7mugBRk2EX/6BAtgUQ@public.gmane.org>
1 sibling, 1 reply; 21+ messages in thread
From: Matthew Wilcox @ 2017-03-16 13:20 UTC (permalink / raw)
To: Goldwyn Rodrigues
Cc: linux-fsdevel, jack, hch, linux-block, linux-btrfs, linux-ext4,
linux-xfs, sagi, avi, axboe, linux-api, Goldwyn Rodrigues
On Wed, Mar 15, 2017 at 04:51:02PM -0500, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>
> Find out if the write will trigger a wait due to writeback. If yes,
> return -EAGAIN.
>
> This introduces a new function filemap_range_has_page() which
> returns true if the file's mapping has a page within the range
> mentioned.
>
> Return -EINVAL for buffered AIO: there are multiple causes of
> delay such as page locks, dirty throttling logic, page loading
> from disk etc. which cannot be taken care of.
Also, this patch only touches the write path; we have a similar call to
write_and_wait_range() in generic_file_read_iter().
Actually, why do we even have that? Why can't we satisfy an O_DIRECT
read from the cache?
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 4/8] nowait-aio: Introduce IOMAP_NOWAIT
2017-03-15 21:50 [PATCH 0/8 v3] No wait AIO Goldwyn Rodrigues
` (2 preceding siblings ...)
2017-03-15 21:51 ` [PATCH 3/8] nowait aio: return if direct write will trigger writeback Goldwyn Rodrigues
@ 2017-03-15 21:51 ` Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 5/8] nowait aio: return on congested block device Goldwyn Rodrigues
` (3 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-15 21:51 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
IOCB_NOWAIT translates to IOMAP_NOWAIT for iomaps.
This is used by XFS in the XFS patch.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/iomap.c | 2 ++
include/linux/iomap.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/fs/iomap.c b/fs/iomap.c
index 141c3cd..d1c8175 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -885,6 +885,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
} else {
dio->flags |= IOMAP_DIO_WRITE;
flags |= IOMAP_WRITE;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ flags |= IOMAP_NOWAIT;
}
if (mapping->nrpages) {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 7291810..53f6af8 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -51,6 +51,7 @@ struct iomap {
#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
#define IOMAP_DIRECT (1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT (1 << 5) /* Don't wait for writeback */
struct iomap_ops {
/*
--
2.10.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 5/8] nowait aio: return on congested block device
2017-03-15 21:50 [PATCH 0/8 v3] No wait AIO Goldwyn Rodrigues
` (3 preceding siblings ...)
2017-03-15 21:51 ` [PATCH 4/8] nowait-aio: Introduce IOMAP_NOWAIT Goldwyn Rodrigues
@ 2017-03-15 21:51 ` Goldwyn Rodrigues
[not found] ` <20170315215107.5628-6-rgoldwyn-l3A5Bk7waGM@public.gmane.org>
2017-03-15 21:51 ` [PATCH 6/8] nowait aio: ext4 Goldwyn Rodrigues
` (2 subsequent siblings)
7 siblings, 1 reply; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-15 21:51 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
A new flag BIO_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
block/blk-core.c | 12 ++++++++++--
block/blk-mq-sched.c | 3 +++
block/blk-mq.c | 4 ++++
fs/direct-io.c | 11 +++++++++--
include/linux/bio.h | 6 ++++++
include/linux/blk_types.h | 1 +
6 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 0eeb99e..2e5cba2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1232,6 +1232,11 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
if (!IS_ERR(rq))
return rq;
+ if (bio && bio_flagged(bio, BIO_NOWAIT)) {
+ blk_put_rl(rl);
+ return ERR_PTR(-EAGAIN);
+ }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
blk_put_rl(rl);
return rq;
@@ -2014,7 +2019,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- if (likely(blk_queue_enter(q, false) == 0)) {
+ if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 0)) {
struct bio_list hold;
struct bio_list lower, same;
@@ -2040,7 +2045,10 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_merge(&bio_list_on_stack, &same);
bio_list_merge(&bio_list_on_stack, &hold);
} else {
- bio_io_error(bio);
+ if (unlikely(bio_flagged(bio, BIO_NOWAIT)))
+ bio_wouldblock_error(bio);
+ else
+ bio_io_error(bio);
}
bio = bio_list_pop(current->bio_list);
} while (bio);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 09af8ff..40e78b5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+ if (likely(bio) && bio_flagged(bio, BIO_NOWAIT))
+ data->flags |= BLK_MQ_REQ_NOWAIT;
+
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 159187a..942ce8c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1518,6 +1518,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+ if (bio && bio_flagged(bio, BIO_NOWAIT))
+ bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
@@ -1642,6 +1644,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+ if (bio && bio_flagged(bio, BIO_NOWAIT))
+ bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea..f6835d3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -386,6 +386,9 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
+ if (dio->iocb->ki_flags & IOCB_NOWAIT)
+ bio_set_flag(bio, BIO_NOWAIT);
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
@@ -480,8 +483,12 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
unsigned i;
int err;
- if (bio->bi_error)
- dio->io_error = -EIO;
+ if (bio->bi_error) {
+ if (bio_flagged(bio, BIO_NOWAIT))
+ dio->io_error = -EAGAIN;
+ else
+ dio->io_error = -EIO;
+ }
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
err = bio->bi_error;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8e52119..1a92707 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -425,6 +425,12 @@ static inline void bio_io_error(struct bio *bio)
bio_endio(bio);
}
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+ bio->bi_error = -EAGAIN;
+ bio_endio(bio);
+}
+
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d703acb..514c08e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -102,6 +102,7 @@ struct bio {
#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */
#define BIO_THROTTLED 9 /* This bio has already been subjected to
* throttling rules. Don't do it again. */
+#define BIO_NOWAIT 10 /* don't block over blk device congestion */
/*
* Flags starting here get preserved by bio_reset() - this includes
--
2.10.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 6/8] nowait aio: ext4
2017-03-15 21:50 [PATCH 0/8 v3] No wait AIO Goldwyn Rodrigues
` (4 preceding siblings ...)
2017-03-15 21:51 ` [PATCH 5/8] nowait aio: return on congested block device Goldwyn Rodrigues
@ 2017-03-15 21:51 ` Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 7/8] nowait aio: xfs Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 8/8] nowait aio: btrfs Goldwyn Rodrigues
7 siblings, 0 replies; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-15 21:51 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Return EAGAIN if any of the following checks fail for direct I/O:
+ i_rwsem is lockable
+ Writing beyond end of file (will trigger allocation)
+ Blocks are not allocated at the write location
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/ext4/file.c | 48 +++++++++++++++++++++++++++++++-----------------
1 file changed, 31 insertions(+), 17 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8210c1f..e223b9f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -127,27 +127,22 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
return 0;
}
-/* Is IO overwriting allocated and initialized blocks? */
-static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
+/* Are IO blocks allocated */
+static bool ext4_blocks_mapped(struct inode *inode, loff_t pos, loff_t len,
+ struct ext4_map_blocks *map)
{
- struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
int err, blklen;
if (pos + len > i_size_read(inode))
return false;
- map.m_lblk = pos >> blkbits;
- map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
- blklen = map.m_len;
+ map->m_lblk = pos >> blkbits;
+ map->m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
+ blklen = map->m_len;
- err = ext4_map_blocks(NULL, inode, &map, 0);
- /*
- * 'err==len' means that all of the blocks have been preallocated,
- * regardless of whether they have been initialized or not. To exclude
- * unwritten extents, we need to check m_flags.
- */
- return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
+ err = ext4_map_blocks(NULL, inode, map, 0);
+ return err == blklen;
}
static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
@@ -204,6 +199,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
int o_direct = iocb->ki_flags & IOCB_DIRECT;
+ int nowait = iocb->ki_flags & IOCB_NOWAIT;
int unaligned_aio = 0;
int overwrite = 0;
ssize_t ret;
@@ -216,7 +212,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_dax_write_iter(iocb, from);
#endif
- inode_lock(inode);
+ if (o_direct && nowait) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock(inode);
+ }
+
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -235,9 +237,21 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
- if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
- overwrite = 1;
+ if (o_direct && !unaligned_aio) {
+ struct ext4_map_blocks map;
+ if (ext4_blocks_mapped(inode, iocb->ki_pos,
+ iov_iter_count(from), &map)) {
+ /* To exclude unwritten extents, we need to check
+ * m_flags.
+ */
+ if (ext4_should_dioread_nolock(inode) &&
+ (map.m_flags & EXT4_MAP_MAPPED))
+ overwrite = 1;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
--
2.10.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 7/8] nowait aio: xfs
2017-03-15 21:50 [PATCH 0/8 v3] No wait AIO Goldwyn Rodrigues
` (5 preceding siblings ...)
2017-03-15 21:51 ` [PATCH 6/8] nowait aio: ext4 Goldwyn Rodrigues
@ 2017-03-15 21:51 ` Goldwyn Rodrigues
2017-03-15 21:51 ` [PATCH 8/8] nowait aio: btrfs Goldwyn Rodrigues
7 siblings, 0 replies; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-15 21:51 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
If IOCB_NOWAIT is set, bail if the i_rwsem is not lockable
immediately.
IF IOMAP_NOWAIT is set, return EAGAIN in xfs_file_iomap_begin
if it needs allocation either due to file extension, writing to a hole,
or COW or waiting for other DIOs to finish.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/xfs/xfs_file.c | 15 +++++++++++----
fs/xfs/xfs_iomap.c | 13 +++++++++++++
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 35703a8..08a5eef 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -541,8 +541,11 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
- xfs_ilock(ip, iolock);
-
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -553,9 +556,13 @@ xfs_file_dio_aio_write(
* otherwise demote the lock if we had to take the exclusive lock
* for other reasons in xfs_file_aio_write_checks.
*/
- if (unaligned_io)
+ if (unaligned_io) {
+ /* If we are going to wait for other DIO to finish, bail */
+ if ((iocb->ki_flags & IOCB_NOWAIT) &&
+ atomic_read(&inode->i_dio_count))
+ return -EAGAIN;
inode_dio_wait(inode);
- else if (iolock == XFS_IOLOCK_EXCL) {
+ } else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 288ee5b..6843725 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1015,6 +1015,11 @@ xfs_file_iomap_begin(
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
if (flags & IOMAP_DIRECT) {
+ /* A reflinked inode will result in CoW alloc */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
/* may drop and re-acquire the ilock */
error = xfs_reflink_allocate_cow(ip, &imap, &shared,
&lockmode);
@@ -1032,6 +1037,14 @@ xfs_file_iomap_begin(
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
/*
+ * If nowait is set bail since we are going to make
+ * allocations.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+ /*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
--
2.10.2
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 8/8] nowait aio: btrfs
2017-03-15 21:50 [PATCH 0/8 v3] No wait AIO Goldwyn Rodrigues
` (6 preceding siblings ...)
2017-03-15 21:51 ` [PATCH 7/8] nowait aio: xfs Goldwyn Rodrigues
@ 2017-03-15 21:51 ` Goldwyn Rodrigues
7 siblings, 0 replies; 21+ messages in thread
From: Goldwyn Rodrigues @ 2017-03-15 21:51 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Return EAGAIN if any of the following checks fail
+ i_rwsem is not lockable
+ NODATACOW or PREALLOC is not set
+ Cannot nocow at the desired location
+ Writing beyond end of file which is not allocated
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/btrfs/file.c | 25 ++++++++++++++++++++-----
fs/btrfs/inode.c | 3 +++
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 520cb72..a870e5d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1823,12 +1823,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
ssize_t num_written = 0;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
ssize_t err;
- loff_t pos;
- size_t count;
+ loff_t pos = iocb->ki_pos;
+ size_t count = iov_iter_count(from);
loff_t oldsize;
int clean_page = 0;
- inode_lock(inode);
+ if ((iocb->ki_flags & IOCB_NOWAIT) &&
+ (iocb->ki_flags & IOCB_DIRECT)) {
+ /* Don't sleep on inode rwsem */
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ /*
+ * We will allocate space in case nodatacow is not set,
+ * so bail
+ */
+ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) ||
+ check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
+ } else
+ inode_lock(inode);
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
inode_unlock(inode);
@@ -1862,8 +1879,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/
update_time_for_write(inode);
- pos = iocb->ki_pos;
- count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c40060c..788bb93 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8613,6 +8613,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (ret)
--
2.10.2
^ permalink raw reply related [flat|nested] 21+ messages in thread