All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] block: avoid extra bio reference for async O_DIRECT
@ 2018-11-29 22:55 Jens Axboe
  2018-11-29 22:58 ` Jens Axboe
  0 siblings, 1 reply; 4+ messages in thread
From: Jens Axboe @ 2018-11-29 22:55 UTC (permalink / raw)
  To: linux-block@vger.kernel.org; +Cc: Christoph Hellwig

The bio referencing has a trick that doesn't do any actual atomic
inc/dec on the reference count until we have to elevator to > 1. For the
async IO O_DIRECT case, we can't use the simple DIO variants, so we use
__blkdev_direct_IO(). It always grabs an extra reference to the bio
after allocation, which means we then enter the slower path of actually
having to do atomic_inc/dec on the count.

We don't need to do that for the async case, unless we end up going
multi-bio, in which case we're already doing huge amounts of IO. For the
smaller IO case (< BIO_MAX_PAGES), we can do without the extra ref.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

diff --git a/fs/block_dev.c b/fs/block_dev.c
index f3bd41f80421..44fc9da13f68 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -292,10 +292,26 @@ static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
 	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
 }
 
+static void blkdev_bio_dirty_release(struct bio *bio, bool should_dirty)
+{
+	if (should_dirty) {
+		bio_check_pages_dirty(bio);
+	} else {
+		struct bio_vec *bvec;
+		int i;
+
+		if (!bio_flagged(bio, BIO_HOLD_PAGES))
+			bio_for_each_segment_all(bvec, bio, i)
+				put_page(bvec->bv_page);
+		bio_put(bio);
+	}
+}
+
 static void blkdev_bio_end_io(struct bio *bio)
 {
 	struct blkdev_dio *dio = bio->bi_private;
 	bool should_dirty = dio->should_dirty;
+	bool should_free = false;
 
 	if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
 		if (bio->bi_status && !dio->bio.bi_status)
@@ -313,26 +329,20 @@ static void blkdev_bio_end_io(struct bio *bio)
 			}
 
 			dio->iocb->ki_complete(iocb, ret, 0);
-			bio_put(&dio->bio);
 		} else {
 			struct task_struct *waiter = dio->waiter;
 
 			WRITE_ONCE(dio->waiter, NULL);
 			blk_wake_io_task(waiter);
 		}
+		/* last bio is done, now we can free the parent holding dio */
+		should_free = dio->multi_bio;
 	}
 
-	if (should_dirty) {
-		bio_check_pages_dirty(bio);
-	} else {
-		struct bio_vec *bvec;
-		int i;
-
-		if (!bio_flagged(bio, BIO_HOLD_PAGES))
-			bio_for_each_segment_all(bvec, bio, i)
-				put_page(bvec->bv_page);
-		bio_put(bio);
-	}
+	if (!dio->is_sync)
+		blkdev_bio_dirty_release(bio, should_dirty);
+	if (should_free)
+		bio_put(&dio->bio);
 }
 
 static ssize_t
@@ -355,14 +365,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		return -EINVAL;
 
 	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
-	bio_get(bio); /* extra ref for the completion handler */
 
 	dio = container_of(bio, struct blkdev_dio, bio);
 	dio->is_sync = is_sync = is_sync_kiocb(iocb);
-	if (dio->is_sync)
+	if (dio->is_sync) {
 		dio->waiter = current;
-	else
+		bio_get(bio);
+	} else {
 		dio->iocb = iocb;
+	}
 
 	dio->size = 0;
 	dio->multi_bio = false;
@@ -425,6 +436,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		}
 
 		if (!dio->multi_bio) {
+			/*
+			 * async needs an extra reference if we are goign
+			 * multi-bio only, so we can ensure that 'dio'
+			 * sticks around.
+			 */
+			if (!is_sync)
+				bio_get(bio);
 			dio->multi_bio = true;
 			atomic_set(&dio->ref, 2);
 		} else {
@@ -458,6 +476,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	if (likely(!ret))
 		ret = dio->size;
 
+	blkdev_bio_dirty_release(bio, dio->should_dirty);
 	bio_put(&dio->bio);
 	return ret;
 }

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-11-30 15:16 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-11-29 22:55 [PATCH] block: avoid extra bio reference for async O_DIRECT Jens Axboe
2018-11-29 22:58 ` Jens Axboe
2018-11-30  8:38   ` Christoph Hellwig
2018-11-30 15:16     ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.