From: Christoph Hellwig <hch@infradead.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-aio@kvack.org,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 4/5] aio: support for IO polling
Date: Mon, 19 Nov 2018 05:32:54 -0800 [thread overview]
Message-ID: <20181119133254.GA32705@infradead.org> (raw)
In-Reply-To: <20181119081119.GJ9622@infradead.org>
I just saw the patch that avoids the irq disabling show up in your
tree this morning. I think we can do even better by using slightly
lazy lists that are not updated from ->ki_complete context.
Please take a look at the patch below - this replaces patch 3 from
my previous mail, that is it is on top of what you send to the list
plus my first two patches.
Completely untested again of course..
---
>From cf9fd90d13a025d53b26ba54202c2898ba4bf0ef Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 18 Nov 2018 17:17:55 +0100
Subject: change aio poll list management
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Have a submitted list, which the iocb is added on on submission,
and batch removed from in __aio_check_polled. The actual I/O
completion only ever marks the iocb completed using a bit flag.
The completion code then walks the list completely lock free
after a quick splice under the irq-disable less lock because we
prevent multiple contexts from polling at the same time.
Also move the actual blk_poll call into a filesystem method,
which makes the aio code better abstracted out, allows checking
if a given file actually supports it, and last but not least
adds support for filesystems with multіple block devices.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/aio.c | 328 +++++++++++++++++-------------------------
fs/block_dev.c | 20 ++-
fs/direct-io.c | 4 +-
fs/iomap.c | 53 ++++---
fs/xfs/xfs_file.c | 1 +
include/linux/fs.h | 2 +-
include/linux/iomap.h | 1 +
7 files changed, 186 insertions(+), 223 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index d9198f99ed97..8fa106db9b64 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -89,6 +89,9 @@ struct ctx_rq_wait {
enum {
CTX_TYPE_NORMAL = 0,
CTX_TYPE_POLLED,
+
+ /* currently undergoing a polling io_getevents */
+ CTX_TYPE_POLLING,
};
struct kioctx {
@@ -151,8 +154,7 @@ struct kioctx {
struct {
spinlock_t poll_lock;
- struct list_head poll_pending;
- struct list_head poll_done;
+ struct list_head poll_submitted;
} ____cacheline_aligned_in_smp;
struct {
@@ -175,6 +177,9 @@ struct kioctx {
struct file *aio_ring_file;
unsigned id;
+
+ struct list_head poll_completing;
+ atomic_t poll_completed;
};
struct fsync_iocb {
@@ -209,21 +214,27 @@ struct aio_kiocb {
struct list_head ki_list; /* the aio core uses this
* for cancellation */
+ unsigned long ki_flags;
+#define IOCB_POLL_COMPLETED 0
struct list_head ki_poll_list;
refcount_t ki_refcnt;
- /*
- * If the aio_resfd field of the userspace iocb is not zero,
- * this is the underlying eventfd context to deliver events to.
- */
- struct eventfd_ctx *ki_eventfd;
+ union {
+ /*
+ * If the aio_resfd field of the userspace iocb is not zero,
+ * this is the underlying eventfd context to deliver events to.
+ */
+ struct eventfd_ctx *ki_eventfd;
- /*
- * For polled IO, stash completion info here
- */
- long ki_poll_res;
- long ki_poll_res2;
+ /*
+ * For polled IO, stash completion info here
+ */
+ struct {
+ long res;
+ long res2;
+ } ki_iopoll;
+ };
};
/*------ sysctl variables----*/
@@ -761,8 +772,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
INIT_LIST_HEAD(&ctx->active_reqs);
spin_lock_init(&ctx->poll_lock);
- INIT_LIST_HEAD(&ctx->poll_pending);
- INIT_LIST_HEAD(&ctx->poll_done);
+ INIT_LIST_HEAD(&ctx->poll_submitted);
+ INIT_LIST_HEAD(&ctx->poll_completing);
+ atomic_set(&ctx->poll_completed, 0);
if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
goto err;
@@ -1282,38 +1294,6 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
return ret < 0 || *i >= min_nr;
}
-struct aio_iopoll_data {
- unsigned int blk_qc;
- struct block_device *bdev;
-};
-
-static int aio_io_poll(struct aio_iopoll_data *pd, bool wait)
-{
-#ifdef CONFIG_BLOCK
- /*
- * Should only happen if someone sets ->ki_blk_qc at random,
- * not being a blockdev target. We'll just ignore it, the IO
- * will complete normally without being polled.
- */
- if (pd->bdev)
- return blk_poll(bdev_get_queue(pd->bdev), pd->blk_qc, wait);
-#endif
-
- return 0;
-}
-
-static struct block_device *aio_bdev_host(struct kiocb *req)
-{
- struct inode *inode = req->ki_filp->f_mapping->host;
-
- if (S_ISBLK(inode->i_mode))
- return I_BDEV(inode);
- else if (inode->i_sb && inode->i_sb->s_bdev)
- return inode->i_sb->s_bdev;
-
- return NULL;
-}
-
#define AIO_POLL_STACK 8
/*
@@ -1322,157 +1302,119 @@ static struct block_device *aio_bdev_host(struct kiocb *req)
* the caller should free them.
*/
static long aio_poll_reap(struct kioctx *ctx, struct io_event __user *evs,
- int off, long max, void **iocbs, int *to_free)
- __releases(&ctx->poll_lock)
- __acquires(&ctx->poll_lock)
+ unsigned int *nr_events, long max)
{
- struct aio_kiocb *iocb;
- int ret, nr = 0;
+ void *iocbs[AIO_POLL_STACK];
+ struct aio_kiocb *iocb, *n;
+ int to_free = 0, ret = 0;
- while ((iocb = list_first_entry_or_null(&ctx->poll_done,
- struct aio_kiocb, ki_poll_list))) {
- struct io_event __user *uev;
+ list_for_each_entry_safe(iocb, n, &ctx->poll_completing, ki_poll_list) {
struct io_event ev;
- if (*to_free == AIO_POLL_STACK) {
- iocb_put_many(ctx, iocbs, *to_free);
- *to_free = 0;
+ if (!test_bit(IOCB_POLL_COMPLETED, &iocb->ki_flags))
+ continue;
+
+ if (to_free == AIO_POLL_STACK) {
+ iocb_put_many(ctx, iocbs, to_free);
+ to_free = 0;
}
list_del(&iocb->ki_poll_list);
- iocbs[*to_free++] = iocb;
+ iocbs[to_free++] = iocb;
if (!evs) {
- nr++;
+ (*nr_events)++;
continue;
}
ev.obj = (u64)(unsigned long)iocb->ki_user_iocb;
ev.data = iocb->ki_user_data;
- ev.res = iocb->ki_poll_res;
- ev.res2 = iocb->ki_poll_res2;
-
- uev = evs + nr + off;
- if (unlikely(__copy_to_user_inatomic(uev, &ev, sizeof(*uev)))) {
- /*
- * Unexpected slow path, drop lock and attempt copy
- * again. If this also fails we are done.
- */
- spin_unlock_irq(&ctx->poll_lock);
- ret = copy_to_user(uev, &ev, sizeof(*uev));
- spin_lock_irq(&ctx->poll_lock);
- if (ret)
- return nr ? nr : -EFAULT;
+ ev.res = iocb->ki_iopoll.res;
+ ev.res2 = iocb->ki_iopoll.res2;
+ if (copy_to_user(evs + *nr_events, &ev, sizeof(ev))) {
+ ret = -EFAULT;
+ break;
}
- if (++nr + off == max)
+ if (++(*nr_events) == max)
break;
}
- return nr;
-}
-
-static void aio_poll_for_events(struct kioctx *ctx, struct aio_iopoll_data *pd,
- unsigned int nr_pd, int off, long min, long max)
-{
- int i, polled = 0;
-
- /*
- * Poll for needed events with wait == true, anything
- * after that we just check if we have more, up to max.
- */
- for (i = 0; i < nr_pd; i++) {
- bool wait = polled + off >= min;
-
- polled += aio_io_poll(&pd[i], wait);
- if (polled + off >= max)
- break;
-
- /*
- * If we have entries waiting to be reaped, stop polling
- */
- if (!list_empty_careful(&ctx->poll_done))
- break;
- }
+ if (to_free)
+ iocb_put_many(ctx, iocbs, to_free);
+ return ret;
}
static int __aio_check_polled(struct kioctx *ctx, struct io_event __user *event,
- int off, unsigned int *entries, long min, long max)
+ unsigned int *nr_events, long min, long max)
{
- struct aio_iopoll_data pd[AIO_POLL_STACK];
- void *iocbs[AIO_POLL_STACK];
- int to_free = 0;
struct aio_kiocb *iocb;
- unsigned int nr_pd;
- int ret, found = 0;
-
- if (list_empty_careful(&ctx->poll_pending))
- goto out;
+ unsigned int poll_completed;
+ int to_poll = 0, polled = 0, ret;
/*
* Check if we already have done events that satisfy what we need
*/
- spin_lock_irq(&ctx->poll_lock);
- while ((ret = aio_poll_reap(ctx, event, off, max, iocbs, &to_free))) {
- if (ret < 0 || ret + off >= min) {
- spin_unlock_irq(&ctx->poll_lock);
- if (to_free)
- iocb_put_many(ctx, iocbs, to_free);
+ if (!list_empty(&ctx->poll_completing)) {
+ ret = aio_poll_reap(ctx, event, nr_events, max);
+ if (ret < 0)
return ret;
- }
+ if (*nr_events >= min)
+ return 0;
+ }
- if (to_free) {
- iocb_put_many(ctx, iocbs, to_free);
- to_free = 0;
- }
- found += ret;
- off += ret;
+ /*
+ * Take in a new working set from the submitted list if possible.
+ */
+ if (!list_empty_careful(&ctx->poll_submitted)) {
+ spin_lock(&ctx->poll_lock);
+ list_splice_init(&ctx->poll_submitted, &ctx->poll_completing);
+ spin_unlock(&ctx->poll_lock);
}
+ if (list_empty(&ctx->poll_completing))
+ return 0;
+
+ /*
+ * Check again now that we have a new batch.
+ */
+ ret = aio_poll_reap(ctx, event, nr_events, max);
+ if (ret < 0)
+ return ret;
+ if (*nr_events >= min)
+ return 0;
+
/*
* Find up to 'max_nr' worth of events to poll for, including the
* events we already successfully polled
*/
- nr_pd = 0;
- list_for_each_entry(iocb, &ctx->poll_pending, ki_poll_list) {
- struct kiocb *kiocb = &iocb->rw;
- blk_qc_t qc;
-
+ poll_completed = atomic_read(&ctx->poll_completed);
+ list_for_each_entry(iocb, &ctx->poll_completing, ki_poll_list) {
/*
- * Not submitted yet, don't poll for it
+ * Poll for needed events with wait == true, anything after
+ * that we just check if we have more, up to max.
*/
- qc = READ_ONCE(kiocb->ki_blk_qc);
- if (qc == BLK_QC_T_NONE)
- continue;
+ bool wait = polled + *nr_events >= min;
- pd[nr_pd].blk_qc = qc;
- pd[nr_pd].bdev = aio_bdev_host(kiocb);
-
- ++nr_pd;
- if (nr_pd == ARRAY_SIZE(pd) || nr_pd + off >= max)
+ if (test_bit(IOCB_POLL_COMPLETED, &iocb->ki_flags))
break;
- }
- spin_unlock_irq(&ctx->poll_lock);
- if (nr_pd) {
- *entries = nr_pd;
- aio_poll_for_events(ctx, pd, nr_pd, off, min, max);
- }
+ if (++to_poll + *nr_events >= max)
+ break;
-out:
- if (!list_empty_careful(&ctx->poll_done)) {
- spin_lock_irq(&ctx->poll_lock);
- ret = aio_poll_reap(ctx, event, off, max, iocbs, &to_free);
- spin_unlock_irq(&ctx->poll_lock);
-
- if (to_free)
- iocb_put_many(ctx, iocbs, to_free);
- if (ret < 0)
- return ret;
- found += ret;
+ polled += iocb->rw.ki_filp->f_op->iopoll(&iocb->rw, wait);
+ if (polled + *nr_events >= max)
+ break;
+ if (poll_completed != atomic_read(&ctx->poll_completed))
+ break;
}
- return found;
+ ret = aio_poll_reap(ctx, event, nr_events, max);
+ if (ret < 0)
+ return ret;
+ if (*nr_events >= min)
+ return 0;
+ return to_poll;
}
/*
@@ -1481,48 +1423,41 @@ static int __aio_check_polled(struct kioctx *ctx, struct io_event __user *event,
*/
static void aio_reap_polled_events(struct kioctx *ctx)
{
- unsigned int loop, found;
-
if (!test_bit(CTX_TYPE_POLLED, &ctx->io_type))
return;
- spin_lock_irq(&ctx->poll_lock);
- while (!list_empty(&ctx->poll_pending) || !list_empty(&ctx->poll_done)) {
- loop = 0;
- spin_unlock_irq(&ctx->poll_lock);
- found = __aio_check_polled(ctx, NULL, 0, &loop, 1, UINT_MAX);
- spin_lock_irq(&ctx->poll_lock);
+ while (!list_empty_careful(&ctx->poll_submitted) ||
+ !list_empty(&ctx->poll_completing)) {
+ unsigned int nr_events = 0;
+
+ __aio_check_polled(ctx, NULL, &nr_events, 1, UINT_MAX);
}
- spin_unlock_irq(&ctx->poll_lock);
}
static int aio_check_polled(struct kioctx *ctx, long min_nr, long nr,
struct io_event __user *event)
{
- unsigned int found;
- int this, ret = 0;
+ unsigned int nr_events = 0;
+ int ret = 0;
- if (!access_ok(VERIFY_WRITE, event, nr * sizeof(*event)))
- return -EFAULT;
+ /* We can only allow a single thread to poll a context at a time */
+ if (test_and_set_bit(CTX_TYPE_POLLING, &ctx->io_type))
+ return -EBUSY;
- do {
- int tmin;
+ while (!nr_events || !need_resched()) {
+ int tmin = 0;
- if (ret && need_resched())
- break;
+ if (nr_events < min_nr)
+ tmin = min_nr - nr_events;
- found = 0;
- tmin = ret >= min_nr ? 0 : min_nr - ret;
- this = __aio_check_polled(ctx, event, ret, &found, tmin, nr);
- if (this < 0) {
- if (!ret)
- ret = this;
+ ret = __aio_check_polled(ctx, event, &nr_events, tmin, nr);
+ if (ret <= 0)
break;
- }
- ret += this;
- } while (found && ret < min_nr);
+ ret = 0;
+ }
- return ret;
+ clear_bit(CTX_TYPE_POLLING, &ctx->io_type);
+ return nr_events ? nr_events : ret;
}
static long read_events(struct kioctx *ctx, long min_nr, long nr,
@@ -1707,19 +1642,15 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
static void aio_complete_rw_poll(struct kiocb *kiocb, long res, long res2)
{
struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
- struct kioctx *ctx = iocb->ki_ctx;
struct file *filp = kiocb->ki_filp;
- unsigned long flags;
kiocb_end_write(kiocb);
- iocb->ki_poll_res = res;
- iocb->ki_poll_res2 = res2;
-
- spin_lock_irqsave(&ctx->poll_lock, flags);
- list_move_tail(&iocb->ki_poll_list, &ctx->poll_done);
- spin_unlock_irqrestore(&ctx->poll_lock, flags);
+ iocb->ki_iopoll.res = res;
+ iocb->ki_iopoll.res2 = res2;
+ set_bit(IOCB_POLL_COMPLETED, &iocb->ki_flags);
+ atomic_inc(&iocb->ki_ctx->poll_completed);
fput(filp);
}
@@ -1737,14 +1668,19 @@ static int aio_prep_rw(struct aio_kiocb *kiocb, struct iocb *iocb)
if (iocb->aio_flags & IOCB_FLAG_HIPRI) {
struct kioctx *ctx = kiocb->ki_ctx;
+ ret = -EOPNOTSUPP;
+ if (!(req->ki_flags & IOCB_DIRECT) ||
+ !req->ki_filp->f_op->iopoll)
+ goto out_fput;
+
req->ki_flags |= IOCB_HIPRI;
- req->ki_blk_qc = BLK_QC_T_NONE;
req->ki_complete = aio_complete_rw_poll;
- spin_lock_irq(&ctx->poll_lock);
- list_add_tail(&kiocb->ki_poll_list, &ctx->poll_pending);
- spin_unlock_irq(&ctx->poll_lock);
+ spin_lock(&ctx->poll_lock);
+ list_add_tail(&kiocb->ki_poll_list, &ctx->poll_submitted);
+ spin_unlock(&ctx->poll_lock);
} else {
+ req->ki_flags &= ~IOCB_HIPRI;
req->ki_complete = aio_complete_rw;
}
@@ -1761,8 +1697,7 @@ static int aio_prep_rw(struct aio_kiocb *kiocb, struct iocb *iocb)
ret = ioprio_check_cap(iocb->aio_reqprio);
if (ret) {
pr_debug("aio ioprio check cap error: %d\n", ret);
- fput(req->ki_filp);
- return ret;
+ goto out_fput;
}
req->ki_ioprio = iocb->aio_reqprio;
@@ -1771,7 +1706,10 @@ static int aio_prep_rw(struct aio_kiocb *kiocb, struct iocb *iocb)
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
- fput(req->ki_filp);
+ goto out_fput;
+ return 0;
+out_fput:
+ fput(req->ki_filp);
return ret;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 8a2fed18e3fc..8ba58e280ac6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -236,7 +236,6 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio.bi_opf |= REQ_HIPRI;
qc = submit_bio(&bio);
- WRITE_ONCE(iocb->ki_blk_qc, qc);
for (;;) {
__set_current_state(TASK_UNINTERRUPTIBLE);
@@ -274,6 +273,7 @@ struct blkdev_dio {
};
size_t size;
atomic_t ref;
+ blk_qc_t qc;
bool multi_bio : 1;
bool should_dirty : 1;
bool is_sync : 1;
@@ -282,6 +282,14 @@ struct blkdev_dio {
static struct bio_set blkdev_dio_pool;
+static bool blkdev_iopoll(struct kiocb *kiocb, bool wait)
+{
+ struct blkdev_dio *dio = kiocb->private;
+ struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
+
+ return blk_poll(bdev_get_queue(bdev), READ_ONCE(dio->qc), wait);
+}
+
static void blkdev_bio_end_io(struct bio *bio)
{
struct blkdev_dio *dio = bio->bi_private;
@@ -336,7 +344,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
- blk_qc_t qc = BLK_QC_T_NONE;
int ret = 0;
if ((pos | iov_iter_alignment(iter)) &
@@ -356,6 +363,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
dio->size = 0;
dio->multi_bio = false;
dio->should_dirty = is_read && iter_is_iovec(iter);
+ dio->qc = BLK_QC_T_NONE;
+
+ iocb->private = dio;
/*
* Don't plug for HIPRI/polled IO, as those should go straight
@@ -396,8 +406,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
if (iocb->ki_flags & IOCB_HIPRI)
bio->bi_opf |= REQ_HIPRI;
- qc = submit_bio(bio);
- WRITE_ONCE(iocb->ki_blk_qc, qc);
+ WRITE_ONCE(dio->qc, submit_bio(bio));
break;
}
@@ -425,7 +434,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc, true))
+ !blk_poll(bdev_get_queue(bdev), dio->qc, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
@@ -2063,6 +2072,7 @@ const struct file_operations def_blk_fops = {
.llseek = block_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
+ .iopoll = blkdev_iopoll,
.mmap = generic_file_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 34de494e9061..a5a4e5a1423e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -477,10 +477,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
if (sdio->submit_io) {
sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio);
dio->bio_cookie = BLK_QC_T_NONE;
- } else {
+ } else
dio->bio_cookie = submit_bio(bio);
- WRITE_ONCE(dio->iocb->ki_blk_qc, dio->bio_cookie);
- }
sdio->bio = NULL;
sdio->boundary = 0;
diff --git a/fs/iomap.c b/fs/iomap.c
index 4cf412b6230a..e5cd9dbe78a8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1419,14 +1419,14 @@ struct iomap_dio {
unsigned flags;
int error;
bool wait_for_completion;
+ blk_qc_t cookie;
+ struct request_queue *last_queue;
union {
/* used during submission and for synchronous completion: */
struct {
struct iov_iter *iter;
struct task_struct *waiter;
- struct request_queue *last_queue;
- blk_qc_t cookie;
} submit;
/* used for aio completion: */
@@ -1436,6 +1436,30 @@ struct iomap_dio {
};
};
+bool iomap_dio_iopoll(struct kiocb *kiocb, bool wait)
+{
+ struct iomap_dio *dio = kiocb->private;
+ struct request_queue *q = READ_ONCE(dio->last_queue);
+
+ if (!q)
+ return false;
+ return blk_poll(q, READ_ONCE(dio->cookie), wait);
+}
+EXPORT_SYMBOL_GPL(iomap_dio_iopoll);
+
+static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
+ struct bio *bio)
+{
+ atomic_inc(&dio->ref);
+
+ /*
+ * iomap_dio_iopoll can race with us. A non-zero last_queue marks that
+ * we are ready to poll.
+ */
+ WRITE_ONCE(dio->cookie, submit_bio(bio));
+ WRITE_ONCE(dio->last_queue, bdev_get_queue(iomap->bdev));
+}
+
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
struct kiocb *iocb = dio->iocb;
@@ -1548,14 +1572,13 @@ static void iomap_dio_bio_end_io(struct bio *bio)
}
}
-static blk_qc_t
+static void
iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
unsigned len)
{
struct page *page = ZERO_PAGE(0);
int flags = REQ_SYNC | REQ_IDLE;
struct bio *bio;
- blk_qc_t qc;
bio = bio_alloc(GFP_KERNEL, 1);
bio_set_dev(bio, iomap->bdev);
@@ -1569,11 +1592,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
get_page(page);
__bio_add_page(bio, page, len, 0);
bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
-
- atomic_inc(&dio->ref);
- qc = submit_bio(bio);
- WRITE_ONCE(dio->iocb->ki_blk_qc, qc);
- return qc;
+ iomap_dio_submit_bio(dio, iomap, bio);
}
static loff_t
@@ -1679,11 +1698,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
copied += n;
nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
-
- atomic_inc(&dio->ref);
-
- dio->submit.last_queue = bdev_get_queue(iomap->bdev);
- dio->iocb->ki_blk_qc = dio->submit.cookie = submit_bio(bio);
+ iomap_dio_submit_bio(dio, iomap, bio);
} while (nr_pages);
if (need_zeroout) {
@@ -1785,6 +1800,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
if (!dio)
return -ENOMEM;
+ iocb->private = dio;
dio->iocb = iocb;
atomic_set(&dio->ref, 1);
@@ -1794,11 +1810,11 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->error = 0;
dio->flags = 0;
dio->wait_for_completion = is_sync_kiocb(iocb);
+ dio->cookie = BLK_QC_T_NONE;
+ dio->last_queue = NULL;
dio->submit.iter = iter;
dio->submit.waiter = current;
- dio->submit.cookie = BLK_QC_T_NONE;
- dio->submit.last_queue = NULL;
if (iov_iter_rw(iter) == READ) {
if (pos >= dio->i_size)
@@ -1897,9 +1913,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !dio->submit.last_queue ||
- !blk_poll(dio->submit.last_queue,
- dio->submit.cookie, true))
+ !dio->last_queue ||
+ !blk_poll(dio->last_queue, dio->cookie, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 53c9ab8fb777..603e705781a4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1203,6 +1203,7 @@ const struct file_operations xfs_file_operations = {
.write_iter = xfs_file_write_iter,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
+ .iopoll = iomap_dio_iopoll,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 032761d9b218..1d46a10aef6c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -310,7 +310,6 @@ struct kiocb {
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
- u32 ki_blk_qc;
} __randomize_layout;
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -1782,6 +1781,7 @@ struct file_operations {
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+ bool (*iopoll)(struct kiocb *kiocb, bool wait);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 9a4258154b25..2cbe87ad1878 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -162,6 +162,7 @@ typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
unsigned flags);
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+bool iomap_dio_iopoll(struct kiocb *kiocb, bool wait);
#ifdef CONFIG_SWAP
struct file;
--
2.19.1
next prev parent reply other threads:[~2018-11-19 23:56 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-17 23:53 [PATCHSET 0/5] Support for polled aio Jens Axboe
2018-11-17 23:53 ` [PATCH 1/5] aio: use assigned completion handler Jens Axboe
2018-11-19 8:06 ` Christoph Hellwig
2018-11-17 23:53 ` [PATCH 2/5] aio: fix failure to put the file pointer Jens Axboe
2018-11-19 8:07 ` Christoph Hellwig
2018-11-19 15:39 ` Jens Axboe
2018-11-17 23:53 ` [PATCH 3/5] aio: add iocb->ki_blk_qc field Jens Axboe
2018-11-19 1:59 ` Dave Chinner
2018-11-19 2:59 ` Jens Axboe
2018-11-17 23:53 ` [PATCH 4/5] aio: support for IO polling Jens Axboe
2018-11-19 8:11 ` Christoph Hellwig
2018-11-19 13:32 ` Christoph Hellwig [this message]
2018-11-19 16:07 ` Jens Axboe
2018-11-17 23:53 ` [PATCH 5/5] aio: add support for file based polled IO Jens Axboe
2018-11-19 1:57 ` Dave Chinner
2018-11-19 2:58 ` Jens Axboe
2018-11-19 8:12 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181119133254.GA32705@infradead.org \
--to=hch@infradead.org \
--cc=axboe@kernel.dk \
--cc=linux-aio@kvack.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).