[PATCH 1/4] aio: io_cancel() no longer returns the io

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/4] aio: io_cancel() no longer returns the io_event
@ 2013-02-28 20:35 Kent Overstreet
  2013-02-28 20:35 ` [PATCH 2/4] aio: Allow cancellation without a cancel callback Kent Overstreet
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Kent Overstreet @ 2013-02-28 20:35 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-aio
  Cc: Kent Overstreet, tytso, axboe, zab, bcrl, anatol

Originally, io_event() was documented to return the io_event if
cancellation succeeded - the io_event wouldn't be delivered via the ring
buffer like it normally would.

But this isn't what the implementation was actually doing; the only
driver implementing cancellation, the usb gadget code, never returned an
io_event in its cancel function. And aio_complete() was recently changed
to no longer suppress event delivery if the kiocb had been cancelled.

This gets rid of the unused io_event argument to kiocb_cancel() and
kiocb->ki_cancel(), and changes io_cancel() to return -EINPROGRESS if
kiocb->ki_cancel() returned success.

Also tweak the refcounting in kiocb_cancel() to make more sense.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/usb/gadget/inode.c |  3 +--
 fs/aio.c                   | 24 ++++++++++--------------
 include/linux/aio.h        |  2 +-
 3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index b540a1d..fd1bf4a 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -524,7 +524,7 @@ struct kiocb_priv {
 	unsigned		actual;
 };
 
-static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e)
+static void ep_aio_cancel(struct kiocb *iocb)
 {
 	struct kiocb_priv	*priv = iocb->private;
 	struct ep_data		*epdata;
@@ -540,7 +540,6 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e)
 	// spin_unlock(&epdata->dev->lock);
 	local_irq_enable();
 
-	aio_put_req(iocb);
 	return value;
 }
 
diff --git a/fs/aio.c b/fs/aio.c
index a86ffd0..4b9bfb5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -256,8 +256,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
-			struct io_event *res)
+static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
 {
 	kiocb_cancel_fn *old, *cancel;
 	int ret = -EINVAL;
@@ -279,12 +278,10 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
 	atomic_inc(&kiocb->ki_users);
 	spin_unlock_irq(&ctx->ctx_lock);
 
-	memset(res, 0, sizeof(*res));
-	res->obj = (u64)(unsigned long)kiocb->ki_obj.user;
-	res->data = kiocb->ki_user_data;
-	ret = cancel(kiocb, res);
+	ret = cancel(kiocb);
 
 	spin_lock_irq(&ctx->ctx_lock);
+	aio_put_req(kiocb);
 
 	return ret;
 }
@@ -305,7 +302,6 @@ static void free_ioctx_rcu(struct rcu_head *head)
 static void free_ioctx(struct kioctx *ctx)
 {
 	struct aio_ring *ring;
-	struct io_event res;
 	struct kiocb *req;
 	unsigned cpu, head, avail;
 
@@ -316,7 +312,7 @@ static void free_ioctx(struct kioctx *ctx)
 				       struct kiocb, ki_list);
 
 		list_del_init(&req->ki_list);
-		kiocb_cancel(ctx, req, &res);
+		kiocb_cancel(ctx, req);
 	}
 
 	spin_unlock_irq(&ctx->ctx_lock);
@@ -1351,7 +1347,6 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
 SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 		struct io_event __user *, result)
 {
-	struct io_event res;
 	struct kioctx *ctx;
 	struct kiocb *kiocb;
 	u32 key;
@@ -1369,18 +1364,19 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 
 	kiocb = lookup_kiocb(ctx, iocb, key);
 	if (kiocb)
-		ret = kiocb_cancel(ctx, kiocb, &res);
+		ret = kiocb_cancel(ctx, kiocb);
 	else
 		ret = -EINVAL;
 
 	spin_unlock_irq(&ctx->ctx_lock);
 
 	if (!ret) {
-		/* Cancellation succeeded -- copy the result
-		 * into the user's buffer.
+		/*
+		 * The result argument is no longer used - the io_event is
+		 * always delivered via the ring buffer. -EINPROGRESS indicates
+		 * cancellation is progress:
 		 */
-		if (copy_to_user(result, &res, sizeof(res)))
-			ret = -EFAULT;
+		ret = -EINPROGRESS;
 	}
 
 	put_ioctx(ctx);
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 93892bf..7340f77 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -28,7 +28,7 @@ struct batch_complete;
  */
 #define KIOCB_CANCELLED		((void *) (~0ULL))
 
-typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *);
+typedef int (kiocb_cancel_fn)(struct kiocb *);
 
 struct kiocb {
 	struct rb_node		ki_node;
-- 
1.7.12

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/4] aio: Allow cancellation without a cancel callback
  2013-02-28 20:35 [PATCH 1/4] aio: io_cancel() no longer returns the io_event Kent Overstreet
@ 2013-02-28 20:35 ` Kent Overstreet
  2013-02-28 21:53   ` Benjamin LaHaise
  2013-02-28 20:35 ` [PATCH 3/4] direct-io: Set dio->io_error directly Kent Overstreet
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 6+ messages in thread
From: Kent Overstreet @ 2013-02-28 20:35 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-aio
  Cc: Kent Overstreet, tytso, axboe, zab, bcrl, anatol

Prep work for bio cancellation. At least initially, we don't want to
implement a callback that has to chase down all the state (multiple
bios/requests) associated with the iocb; a simple flag will suffice.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/usb/gadget/inode.c |  7 +----
 fs/aio.c                   | 76 +++++++++++++++++++++-------------------------
 include/linux/aio.h        |  5 +++
 3 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index fd1bf4a..e5e2210 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -528,19 +528,14 @@ static void ep_aio_cancel(struct kiocb *iocb)
 {
 	struct kiocb_priv	*priv = iocb->private;
 	struct ep_data		*epdata;
-	int			value;
 
 	local_irq_disable();
 	epdata = priv->epdata;
 	// spin_lock(&epdata->dev->lock);
 	if (likely(epdata && epdata->ep && priv->req))
-		value = usb_ep_dequeue (epdata->ep, priv->req);
-	else
-		value = -EINVAL;
+		usb_ep_dequeue (epdata->ep, priv->req);
 	// spin_unlock(&epdata->dev->lock);
 	local_irq_enable();
-
-	return value;
 }
 
 static ssize_t ep_copy_to_user(struct kiocb_priv *priv)
diff --git a/fs/aio.c b/fs/aio.c
index 4b9bfb5..f5f27bd 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -242,48 +242,34 @@ static int aio_setup_ring(struct kioctx *ctx)
 
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
 {
-	struct kioctx *ctx = req->ki_ctx;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ctx->ctx_lock, flags);
+	kiocb_cancel_fn *p, *old = req->ki_cancel;
 
-	if (!req->ki_list.next)
-		list_add(&req->ki_list, &ctx->active_reqs);
-
-	req->ki_cancel = cancel;
+	do {
+		if (old == KIOCB_CANCELLED) {
+			cancel(req);
+			return;
+		}
 
-	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+		p = old;
+		old = cmpxchg(&req->ki_cancel, old, cancel);
+	} while (old != p);
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
+static void kiocb_cancel(struct kioctx *ctx, struct kiocb *req)
 {
-	kiocb_cancel_fn *old, *cancel;
-	int ret = -EINVAL;
-
-	/*
-	 * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
-	 * actually has a cancel function, hence the cmpxchg()
-	 */
-
-	cancel = ACCESS_ONCE(kiocb->ki_cancel);
-	do {
-		if (!cancel || cancel == KIOCB_CANCELLED)
-			return ret;
-
-		old = cancel;
-		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
-	} while (cancel != old);
-
-	atomic_inc(&kiocb->ki_users);
-	spin_unlock_irq(&ctx->ctx_lock);
+	kiocb_cancel_fn *cancel;
 
-	ret = cancel(kiocb);
+	cancel = xchg(&req->ki_cancel, KIOCB_CANCELLED);
+	if (cancel && cancel != KIOCB_CANCELLED) {
+		atomic_inc(&req->ki_users);
+		spin_unlock_irq(&ctx->ctx_lock);
 
-	spin_lock_irq(&ctx->ctx_lock);
-	aio_put_req(kiocb);
+		cancel(req);
 
-	return ret;
+		spin_lock_irq(&ctx->ctx_lock);
+		aio_put_req(req);
+	}
 }
 
 static void free_ioctx_rcu(struct rcu_head *head)
@@ -1126,6 +1112,10 @@ rw_common:
 
 		req->ki_nbytes = ret;
 
+		spin_lock_irq(&req->ki_ctx->ctx_lock);
+		list_add(&req->ki_list, &req->ki_ctx->active_reqs);
+		spin_unlock_irq(&req->ki_ctx->ctx_lock);
+
 		/* XXX: move/kill - rw_verify_area()? */
 		/* This matches the pread()/pwrite() logic */
 		if (req->ki_pos < 0) {
@@ -1141,6 +1131,10 @@ rw_common:
 		if (!file->f_op->aio_fsync)
 			return -EINVAL;
 
+		spin_lock_irq(&req->ki_ctx->ctx_lock);
+		list_add(&req->ki_list, &req->ki_ctx->active_reqs);
+		spin_unlock_irq(&req->ki_ctx->ctx_lock);
+
 		ret = file->f_op->aio_fsync(req, 1);
 		break;
 
@@ -1148,6 +1142,10 @@ rw_common:
 		if (!file->f_op->aio_fsync)
 			return -EINVAL;
 
+		spin_lock_irq(&req->ki_ctx->ctx_lock);
+		list_add(&req->ki_list, &req->ki_ctx->active_reqs);
+		spin_unlock_irq(&req->ki_ctx->ctx_lock);
+
 		ret = file->f_op->aio_fsync(req, 0);
 		break;
 
@@ -1363,14 +1361,8 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	spin_lock_irq(&ctx->ctx_lock);
 
 	kiocb = lookup_kiocb(ctx, iocb, key);
-	if (kiocb)
-		ret = kiocb_cancel(ctx, kiocb);
-	else
-		ret = -EINVAL;
-
-	spin_unlock_irq(&ctx->ctx_lock);
-
-	if (!ret) {
+	if (kiocb) {
+		kiocb_cancel(ctx, kiocb);
 		/*
 		 * The result argument is no longer used - the io_event is
 		 * always delivered via the ring buffer. -EINPROGRESS indicates
@@ -1379,6 +1371,8 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 		ret = -EINPROGRESS;
 	}
 
+	spin_unlock_irq(&ctx->ctx_lock);
+
 	put_ioctx(ctx);
 
 	return ret;
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 7340f77..d9686f1 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -65,6 +65,11 @@ struct kiocb {
 	struct eventfd_ctx	*ki_eventfd;
 };
 
+static inline bool kiocb_cancelled(struct kiocb *kiocb)
+{
+	return kiocb->ki_cancel == KIOCB_CANCELLED;
+}
+
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
 {
 	return kiocb->ki_ctx == NULL;
-- 
1.7.12

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/4] direct-io: Set dio->io_error directly
  2013-02-28 20:35 [PATCH 1/4] aio: io_cancel() no longer returns the io_event Kent Overstreet
  2013-02-28 20:35 ` [PATCH 2/4] aio: Allow cancellation without a cancel callback Kent Overstreet
@ 2013-02-28 20:35 ` Kent Overstreet
  2013-02-28 20:35 ` [PATCH 4/4] block: Bio cancellation Kent Overstreet
  2013-02-28 21:40 ` [PATCH 1/4] aio: io_cancel() no longer returns the io_event Benjamin LaHaise
  3 siblings, 0 replies; 6+ messages in thread
From: Kent Overstreet @ 2013-02-28 20:35 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-aio
  Cc: Kent Overstreet, tytso, axboe, zab, bcrl, anatol

The way io errors are returned in the dio code was rather convoluted,
and also meant that the specific error code was lost. We need to return
the actual error so that for cancellation we can pass up -ECANCELED.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 fs/direct-io.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index ef234ee..b054615 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -270,7 +270,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
 	return ret;
 }
 
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static void dio_bio_complete(struct dio *dio, struct bio *bio);
 /*
  * Asynchronous IO callback. 
  */
@@ -328,6 +328,9 @@ void dio_end_io(struct bio *bio, int error)
 {
 	struct dio *dio = bio->bi_private;
 
+	if (error)
+		dio->io_error = error;
+
 	if (dio->is_async)
 		dio_bio_end_aio(bio, error, NULL);
 	else
@@ -440,15 +443,11 @@ static struct bio *dio_await_one(struct dio *dio)
 /*
  * Process one completed BIO.  No locks are held.
  */
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static void dio_bio_complete(struct dio *dio, struct bio *bio)
 {
-	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	struct bio_vec *bvec = bio->bi_io_vec;
 	int page_no;
 
-	if (!uptodate)
-		dio->io_error = -EIO;
-
 	if (dio->is_async && dio->rw == READ) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
@@ -461,7 +460,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 		}
 		bio_put(bio);
 	}
-	return uptodate ? 0 : -EIO;
 }
 
 /*
@@ -488,27 +486,21 @@ static void dio_await_completion(struct dio *dio)
  *
  * This also helps to limit the peak amount of pinned userspace memory.
  */
-static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
+static inline void dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
 {
-	int ret = 0;
-
 	if (sdio->reap_counter++ >= 64) {
 		while (dio->bio_list) {
 			unsigned long flags;
 			struct bio *bio;
-			int ret2;
 
 			spin_lock_irqsave(&dio->bio_lock, flags);
 			bio = dio->bio_list;
 			dio->bio_list = bio->bi_private;
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			ret2 = dio_bio_complete(dio, bio);
-			if (ret == 0)
-				ret = ret2;
+			dio_bio_complete(dio, bio);
 		}
 		sdio->reap_counter = 0;
 	}
-	return ret;
 }
 
 /*
@@ -593,19 +585,20 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
 		sector_t start_sector, struct buffer_head *map_bh)
 {
 	sector_t sector;
-	int ret, nr_pages;
+	int nr_pages;
+
+	dio_bio_reap(dio, sdio);
+
+	if (dio->io_error)
+		return dio->io_error;
 
-	ret = dio_bio_reap(dio, sdio);
-	if (ret)
-		goto out;
 	sector = start_sector << (sdio->blkbits - 9);
 	nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev));
 	nr_pages = min(nr_pages, BIO_MAX_PAGES);
 	BUG_ON(nr_pages <= 0);
 	dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
 	sdio->boundary = 0;
-out:
-	return ret;
+	return 0;
 }
 
 /*
-- 
1.7.12

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] block: Bio cancellation
  2013-02-28 20:35 [PATCH 1/4] aio: io_cancel() no longer returns the io_event Kent Overstreet
  2013-02-28 20:35 ` [PATCH 2/4] aio: Allow cancellation without a cancel callback Kent Overstreet
  2013-02-28 20:35 ` [PATCH 3/4] direct-io: Set dio->io_error directly Kent Overstreet
@ 2013-02-28 20:35 ` Kent Overstreet
  2013-02-28 21:40 ` [PATCH 1/4] aio: io_cancel() no longer returns the io_event Benjamin LaHaise
  3 siblings, 0 replies; 6+ messages in thread
From: Kent Overstreet @ 2013-02-28 20:35 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel, linux-aio
  Cc: Kent Overstreet, tytso, axboe, zab, bcrl, anatol

If a bio is associated with a kiocb, allow it to be cancelled.

This is accomplished by adding a pointer to a kiocb in struct bio, and
when we go to dequeue a request we check if its bio has been cancelled -
if so, we end the request with -ECANCELED.

We don't currently try to cancel bios if IO has already been started -
that'd require a per bio callback function, and a way to find all the
outstanding bios for a given kiocb. Such a mechanism may or may not be
added in the future but this patch tries to start simple.

Currently this can only be triggered with aio and io_cancel(), but the
mechanism can be used for sync io too.

It can also be used for bios created by stacking drivers, and bio clones
in general - when cloning a bio, if the bi_iocb pointer is copied as
well the clone will then be cancellable. bio_clone() could be modified
to do this, but hasn't in this patch because all the bio_clone() users
would need to be auditied to make sure that it's safe. We can't blindly
make e.g. raid5 writes cancellable without the knowledge of the md code.

Initial patch by Anatol Pomazau (anatol@google.com).

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 block/blk-core.c          | 14 ++++++++++++++
 fs/direct-io.c            |  1 +
 include/linux/bio.h       |  5 +++++
 include/linux/blk_types.h |  1 +
 4 files changed, 21 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index ba16771..a829670 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1746,6 +1746,11 @@ generic_make_request_checks(struct bio *bio)
 		goto end_io;
 	}
 
+	if (bio_cancelled(bio)) {
+		err = -ECANCELED;
+		goto end_io;
+	}
+
 	/*
 	 * Various block parts want %current->io_context and lazy ioc
 	 * allocation ends up trading a lot of pain for a small amount of
@@ -2099,6 +2104,12 @@ struct request *blk_peek_request(struct request_queue *q)
 			trace_block_rq_issue(q, rq);
 		}
 
+		if (rq->bio && !rq->bio->bi_next && bio_cancelled(rq->bio)) {
+			blk_start_request(rq);
+			__blk_end_request_all(rq, -ECANCELED);
+			continue;
+		}
+
 		if (!q->boundary_rq || q->boundary_rq == rq) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = NULL;
@@ -2284,6 +2295,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes,
 		char *error_type;
 
 		switch (error) {
+		case -ECANCELED:
+			goto noerr;
 		case -ENOLINK:
 			error_type = "recoverable transport";
 			break;
@@ -2304,6 +2317,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes,
 				   (unsigned long long)blk_rq_pos(req));
 
 	}
+noerr:
 
 	blk_account_io_completion(req, nr_bytes);
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b054615..671673c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -376,6 +376,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 	unsigned long flags;
 
 	bio->bi_private = dio;
+	bio->bi_iocb = dio->iocb;
 
 	spin_lock_irqsave(&dio->bio_lock, flags);
 	dio->refcount++;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5f5491767..28d5e45 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -71,6 +71,11 @@
 
 void bio_endio_batch(struct bio *bio, int error, struct batch_complete *batch);
 
+static inline bool bio_cancelled(struct bio *bio)
+{
+	return bio->bi_iocb && kiocb_cancelled(bio->bi_iocb);
+}
+
 static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
 	if (bio->bi_vcnt)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d4e7bab..4d08359 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -44,6 +44,7 @@ struct bio {
 						 * top bits priority
 						 */
 
+	struct kiocb		*bi_iocb;
 	short			bi_error;
 	unsigned short		bi_vcnt;	/* how many bio_vec's */
 	unsigned short		bi_idx;		/* current index into bvl_vec */
-- 
1.7.12

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/4] aio: io_cancel() no longer returns the io_event
  2013-02-28 20:35 [PATCH 1/4] aio: io_cancel() no longer returns the io_event Kent Overstreet
                   ` (2 preceding siblings ...)
  2013-02-28 20:35 ` [PATCH 4/4] block: Bio cancellation Kent Overstreet
@ 2013-02-28 21:40 ` Benjamin LaHaise
  3 siblings, 0 replies; 6+ messages in thread
From: Benjamin LaHaise @ 2013-02-28 21:40 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: linux-kernel, linux-fsdevel, linux-aio, tytso, axboe, zab, anatol

On Thu, Feb 28, 2013 at 12:35:38PM -0800, Kent Overstreet wrote:
> Originally, io_event() was documented to return the io_event if
> cancellation succeeded - the io_event wouldn't be delivered via the ring
> buffer like it normally would.
> 
> But this isn't what the implementation was actually doing; the only
> driver implementing cancellation, the usb gadget code, never returned an
> io_event in its cancel function. And aio_complete() was recently changed
> to no longer suppress event delivery if the kiocb had been cancelled.
> 
> This gets rid of the unused io_event argument to kiocb_cancel() and
> kiocb->ki_cancel(), and changes io_cancel() to return -EINPROGRESS if
> kiocb->ki_cancel() returned success.

...

> diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
> index b540a1d..fd1bf4a 100644
> --- a/drivers/usb/gadget/inode.c
> +++ b/drivers/usb/gadget/inode.c
> @@ -524,7 +524,7 @@ struct kiocb_priv {
>  	unsigned		actual;
>  };
>  
> -static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e)
> +static void ep_aio_cancel(struct kiocb *iocb)
>  {
>  	struct kiocb_priv	*priv = iocb->private;
>  	struct ep_data		*epdata;
> @@ -540,7 +540,6 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e)
>  	// spin_unlock(&epdata->dev->lock);
>  	local_irq_enable();
>  
> -	aio_put_req(iocb);
>  	return value;
>  }
>  

You seem to have removed the return value from ki_cancel in the gadget code, 
so this won't compile.  I think this is wrong and an unnecessary restriction 
As such, I'm NAKing this patch.  Please leave the return value from the 
ki_cancel function present so we can know if the cancel actually succeeded.

		-ben


> diff --git a/fs/aio.c b/fs/aio.c
> index a86ffd0..4b9bfb5 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -256,8 +256,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
>  }
>  EXPORT_SYMBOL(kiocb_set_cancel_fn);
>  
> -static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
> -			struct io_event *res)
> +static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
>  {
>  	kiocb_cancel_fn *old, *cancel;
>  	int ret = -EINVAL;
> @@ -279,12 +278,10 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
>  	atomic_inc(&kiocb->ki_users);
>  	spin_unlock_irq(&ctx->ctx_lock);
>  
> -	memset(res, 0, sizeof(*res));
> -	res->obj = (u64)(unsigned long)kiocb->ki_obj.user;
> -	res->data = kiocb->ki_user_data;
> -	ret = cancel(kiocb, res);
> +	ret = cancel(kiocb);
>  
>  	spin_lock_irq(&ctx->ctx_lock);
> +	aio_put_req(kiocb);
>  
>  	return ret;
>  }
> @@ -305,7 +302,6 @@ static void free_ioctx_rcu(struct rcu_head *head)
>  static void free_ioctx(struct kioctx *ctx)
>  {
>  	struct aio_ring *ring;
> -	struct io_event res;
>  	struct kiocb *req;
>  	unsigned cpu, head, avail;
>  
> @@ -316,7 +312,7 @@ static void free_ioctx(struct kioctx *ctx)
>  				       struct kiocb, ki_list);
>  
>  		list_del_init(&req->ki_list);
> -		kiocb_cancel(ctx, req, &res);
> +		kiocb_cancel(ctx, req);
>  	}
>  
>  	spin_unlock_irq(&ctx->ctx_lock);
> @@ -1351,7 +1347,6 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
>  SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>  		struct io_event __user *, result)
>  {
> -	struct io_event res;
>  	struct kioctx *ctx;
>  	struct kiocb *kiocb;
>  	u32 key;
> @@ -1369,18 +1364,19 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>  
>  	kiocb = lookup_kiocb(ctx, iocb, key);
>  	if (kiocb)
> -		ret = kiocb_cancel(ctx, kiocb, &res);
> +		ret = kiocb_cancel(ctx, kiocb);
>  	else
>  		ret = -EINVAL;
>  
>  	spin_unlock_irq(&ctx->ctx_lock);
>  
>  	if (!ret) {
> -		/* Cancellation succeeded -- copy the result
> -		 * into the user's buffer.
> +		/*
> +		 * The result argument is no longer used - the io_event is
> +		 * always delivered via the ring buffer. -EINPROGRESS indicates
> +		 * cancellation is progress:
>  		 */
> -		if (copy_to_user(result, &res, sizeof(res)))
> -			ret = -EFAULT;
> +		ret = -EINPROGRESS;
>  	}
>  
>  	put_ioctx(ctx);
> diff --git a/include/linux/aio.h b/include/linux/aio.h
> index 93892bf..7340f77 100644
> --- a/include/linux/aio.h
> +++ b/include/linux/aio.h
> @@ -28,7 +28,7 @@ struct batch_complete;
>   */
>  #define KIOCB_CANCELLED		((void *) (~0ULL))
>  
> -typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *);
> +typedef int (kiocb_cancel_fn)(struct kiocb *);
>  
>  struct kiocb {
>  	struct rb_node		ki_node;
> -- 
> 1.7.12

-- 
"Thought is the essence of where you are now."

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/4] aio: Allow cancellation without a cancel callback
  2013-02-28 20:35 ` [PATCH 2/4] aio: Allow cancellation without a cancel callback Kent Overstreet
@ 2013-02-28 21:53   ` Benjamin LaHaise
  0 siblings, 0 replies; 6+ messages in thread
From: Benjamin LaHaise @ 2013-02-28 21:53 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: linux-kernel, linux-fsdevel, linux-aio, tytso, axboe, zab, anatol

On Thu, Feb 28, 2013 at 12:35:39PM -0800, Kent Overstreet wrote:
> Prep work for bio cancellation. At least initially, we don't want to
> implement a callback that has to chase down all the state (multiple
> bios/requests) associated with the iocb; a simple flag will suffice.

I don't this you want to force mandatory addition to the cancel list.  This 
essentially regresses part of the optimization work you put into place with 
percpu reference counts and the rest of the cleanups you put into place for 
the aio core and direct i/o.  I think we should find a better way of doing 
this to keep your performance optimizations in as good a state as possible.

This patch also makes cancellation falsely return succees for kiocbs that do 
not have any actual support for cancellation.  I think this is incorrect, 
and have to NAK this part of the patch as a result.

		-ben
> 
> Signed-off-by: Kent Overstreet <koverstreet@google.com>
> ---
>  drivers/usb/gadget/inode.c |  7 +----
>  fs/aio.c                   | 76 +++++++++++++++++++++-------------------------
>  include/linux/aio.h        |  5 +++
>  3 files changed, 41 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
> index fd1bf4a..e5e2210 100644
> --- a/drivers/usb/gadget/inode.c
> +++ b/drivers/usb/gadget/inode.c
> @@ -528,19 +528,14 @@ static void ep_aio_cancel(struct kiocb *iocb)
>  {
>  	struct kiocb_priv	*priv = iocb->private;
>  	struct ep_data		*epdata;
> -	int			value;
>  
>  	local_irq_disable();
>  	epdata = priv->epdata;
>  	// spin_lock(&epdata->dev->lock);
>  	if (likely(epdata && epdata->ep && priv->req))
> -		value = usb_ep_dequeue (epdata->ep, priv->req);
> -	else
> -		value = -EINVAL;
> +		usb_ep_dequeue (epdata->ep, priv->req);
>  	// spin_unlock(&epdata->dev->lock);
>  	local_irq_enable();
> -
> -	return value;
>  }
>  
>  static ssize_t ep_copy_to_user(struct kiocb_priv *priv)
> diff --git a/fs/aio.c b/fs/aio.c
> index 4b9bfb5..f5f27bd 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -242,48 +242,34 @@ static int aio_setup_ring(struct kioctx *ctx)
>  
>  void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
>  {
> -	struct kioctx *ctx = req->ki_ctx;
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&ctx->ctx_lock, flags);
> +	kiocb_cancel_fn *p, *old = req->ki_cancel;
>  
> -	if (!req->ki_list.next)
> -		list_add(&req->ki_list, &ctx->active_reqs);
> -
> -	req->ki_cancel = cancel;
> +	do {
> +		if (old == KIOCB_CANCELLED) {
> +			cancel(req);
> +			return;
> +		}
>  
> -	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
> +		p = old;
> +		old = cmpxchg(&req->ki_cancel, old, cancel);
> +	} while (old != p);
>  }
>  EXPORT_SYMBOL(kiocb_set_cancel_fn);
>  
> -static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
> +static void kiocb_cancel(struct kioctx *ctx, struct kiocb *req)
>  {
> -	kiocb_cancel_fn *old, *cancel;
> -	int ret = -EINVAL;
> -
> -	/*
> -	 * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
> -	 * actually has a cancel function, hence the cmpxchg()
> -	 */
> -
> -	cancel = ACCESS_ONCE(kiocb->ki_cancel);
> -	do {
> -		if (!cancel || cancel == KIOCB_CANCELLED)
> -			return ret;
> -
> -		old = cancel;
> -		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
> -	} while (cancel != old);
> -
> -	atomic_inc(&kiocb->ki_users);
> -	spin_unlock_irq(&ctx->ctx_lock);
> +	kiocb_cancel_fn *cancel;
>  
> -	ret = cancel(kiocb);
> +	cancel = xchg(&req->ki_cancel, KIOCB_CANCELLED);
> +	if (cancel && cancel != KIOCB_CANCELLED) {
> +		atomic_inc(&req->ki_users);
> +		spin_unlock_irq(&ctx->ctx_lock);
>  
> -	spin_lock_irq(&ctx->ctx_lock);
> -	aio_put_req(kiocb);
> +		cancel(req);
>  
> -	return ret;
> +		spin_lock_irq(&ctx->ctx_lock);
> +		aio_put_req(req);
> +	}
>  }
>  
>  static void free_ioctx_rcu(struct rcu_head *head)
> @@ -1126,6 +1112,10 @@ rw_common:
>  
>  		req->ki_nbytes = ret;
>  
> +		spin_lock_irq(&req->ki_ctx->ctx_lock);
> +		list_add(&req->ki_list, &req->ki_ctx->active_reqs);
> +		spin_unlock_irq(&req->ki_ctx->ctx_lock);
> +
>  		/* XXX: move/kill - rw_verify_area()? */
>  		/* This matches the pread()/pwrite() logic */
>  		if (req->ki_pos < 0) {
> @@ -1141,6 +1131,10 @@ rw_common:
>  		if (!file->f_op->aio_fsync)
>  			return -EINVAL;
>  
> +		spin_lock_irq(&req->ki_ctx->ctx_lock);
> +		list_add(&req->ki_list, &req->ki_ctx->active_reqs);
> +		spin_unlock_irq(&req->ki_ctx->ctx_lock);
> +
>  		ret = file->f_op->aio_fsync(req, 1);
>  		break;
>  
> @@ -1148,6 +1142,10 @@ rw_common:
>  		if (!file->f_op->aio_fsync)
>  			return -EINVAL;
>  
> +		spin_lock_irq(&req->ki_ctx->ctx_lock);
> +		list_add(&req->ki_list, &req->ki_ctx->active_reqs);
> +		spin_unlock_irq(&req->ki_ctx->ctx_lock);
> +
>  		ret = file->f_op->aio_fsync(req, 0);
>  		break;
>  
> @@ -1363,14 +1361,8 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>  	spin_lock_irq(&ctx->ctx_lock);
>  
>  	kiocb = lookup_kiocb(ctx, iocb, key);
> -	if (kiocb)
> -		ret = kiocb_cancel(ctx, kiocb);
> -	else
> -		ret = -EINVAL;
> -
> -	spin_unlock_irq(&ctx->ctx_lock);
> -
> -	if (!ret) {
> +	if (kiocb) {
> +		kiocb_cancel(ctx, kiocb);
>  		/*
>  		 * The result argument is no longer used - the io_event is
>  		 * always delivered via the ring buffer. -EINPROGRESS indicates
> @@ -1379,6 +1371,8 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
>  		ret = -EINPROGRESS;
>  	}
>  
> +	spin_unlock_irq(&ctx->ctx_lock);
> +
>  	put_ioctx(ctx);
>  
>  	return ret;
> diff --git a/include/linux/aio.h b/include/linux/aio.h
> index 7340f77..d9686f1 100644
> --- a/include/linux/aio.h
> +++ b/include/linux/aio.h
> @@ -65,6 +65,11 @@ struct kiocb {
>  	struct eventfd_ctx	*ki_eventfd;
>  };
>  
> +static inline bool kiocb_cancelled(struct kiocb *kiocb)
> +{
> +	return kiocb->ki_cancel == KIOCB_CANCELLED;
> +}
> +
>  static inline bool is_sync_kiocb(struct kiocb *kiocb)
>  {
>  	return kiocb->ki_ctx == NULL;
> -- 
> 1.7.12

-- 
"Thought is the essence of where you are now."

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-02-28 21:53 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-02-28 20:35 [PATCH 1/4] aio: io_cancel() no longer returns the io_event Kent Overstreet
2013-02-28 20:35 ` [PATCH 2/4] aio: Allow cancellation without a cancel callback Kent Overstreet
2013-02-28 21:53   ` Benjamin LaHaise
2013-02-28 20:35 ` [PATCH 3/4] direct-io: Set dio->io_error directly Kent Overstreet
2013-02-28 20:35 ` [PATCH 4/4] block: Bio cancellation Kent Overstreet
2013-02-28 21:40 ` [PATCH 1/4] aio: io_cancel() no longer returns the io_event Benjamin LaHaise

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).