linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.com>
To: Ming Lei <tom.leiming@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>,
	Jens Axboe <axboe@kernel.dk>,
	linux-block <linux-block@vger.kernel.org>,
	"open list:SOFTWARE RAID (Multiple Disks) SUPPORT"
	<linux-raid@vger.kernel.org>,
	"open list:DEVICE-MAPPER (LVM)" <dm-devel@redhat.com>,
	Alasdair Kergon <agk@redhat.com>,
	Mike Snitzer <snitzer@redhat.com>, Shaohua Li <shli@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	"Martin K . Petersen" <martin.petersen@oracle.com>
Subject: Re: [PATCH v3] block: trace completion of all bios.
Date: Mon, 27 Mar 2017 10:17:03 +1100	[thread overview]
Message-ID: <87inmv8w7k.fsf@notabene.neil.brown.name> (raw)
In-Reply-To: <CACVXFVNC=-_f1jYo5bYw7X6TT8-g7UThw3zzzfrMaKVarnJvSQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 4825 bytes --]

On Fri, Mar 24 2017, Ming Lei wrote:

> On Fri, Mar 24, 2017 at 8:07 AM, NeilBrown <neilb@suse.com> wrote:
...
>> @@ -102,6 +102,8 @@ struct bio {
>>  #define BIO_REFFED     8       /* bio has elevated ->bi_cnt */
>>  #define BIO_THROTTLED  9       /* This bio has already been subjected to
>>                                  * throttling rules. Don't do it again. */
>> +#define BIO_TRACE_COMPLETION 10        /* bio_endio() should trace the final completion
>> +                                * of this bio. */
>
> This may not be a good idea, since the flag space is quite small(12).

which means there are 2 unused bits and I only want to use 1.  It should
fit.
I'm a bit perplexed why 4 bits a reserved for the BVEC_POOL number which
ranges from 0 to 7....

But if these bits really are a scarce resource, we should stop wasting
them.
The patch below makes bit 7 (BIO_CHAIN) available.  We could probably make
bit 8 (BIO_REFFED) available using a similar technique.
BIO_QUIET is only relevant if bi_error is non-zero and bi_error has a
limited range, so a bit in there could be used instead. In fact,
MAX_ERRNO is 4096, so bi_error could be a 'short'.  That could save 2
whole bytes ... or make 16 more flag bits available.

So I don't think you concern about a shortage of spare flag bits is valid.

Thanks,
NeilBrown

diff --git a/block/bio.c b/block/bio.c
index c1272986133e..1703786a206a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -274,7 +274,7 @@ void bio_init(struct bio *bio, struct bio_vec *table,
 	      unsigned short max_vecs)
 {
 	memset(bio, 0, sizeof(*bio));
-	atomic_set(&bio->__bi_remaining, 1);
+	atomic_set(&bio->__bi_remaining, 0);
 	atomic_set(&bio->__bi_cnt, 1);
 
 	bio->bi_io_vec = table;
@@ -300,7 +300,7 @@ void bio_reset(struct bio *bio)
 
 	memset(bio, 0, BIO_RESET_BYTES);
 	bio->bi_flags = flags;
-	atomic_set(&bio->__bi_remaining, 1);
+	atomic_set(&bio->__bi_remaining, 0);
 }
 EXPORT_SYMBOL(bio_reset);
 
@@ -1794,20 +1794,15 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
 static inline bool bio_remaining_done(struct bio *bio)
 {
 	/*
-	 * If we're not chaining, then ->__bi_remaining is always 1 and
+	 * If we're not chaining, then ->__bi_remaining is always 0 and
 	 * we always end io on the first invocation.
 	 */
-	if (!bio_flagged(bio, BIO_CHAIN))
+	if (atomic_read(&bio->__bi_remaining) == 0)
 		return true;
 
 	BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
 
-	if (atomic_dec_and_test(&bio->__bi_remaining)) {
-		bio_clear_flag(bio, BIO_CHAIN);
-		return true;
-	}
-
-	return false;
+	return atomic_dec_and_test(&bio->__bi_remaining);
 }
 
 /**
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8e521194f6fc..d15245544111 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -664,13 +664,19 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
 }
 
 /*
- * Increment chain count for the bio. Make sure the CHAIN flag update
- * is visible before the raised count.
+ * Increment chain count for the bio.
  */
 static inline void bio_inc_remaining(struct bio *bio)
 {
-	bio_set_flag(bio, BIO_CHAIN);
-	smp_mb__before_atomic();
+	/*
+	 * Calls to bio_inc_remaining() cannot race
+	 * with the final call to bio_end_io(), and
+	 * the first call cannot race with other calls,
+	 * so if __bi_remaining appears to be zero, there
+	 * can be no race which might change it.
+	 */
+	if (atomic_read(&bio->__bi_remaining) == 0)
+		atomic_set(&bio->__bi_remaining, 1);
 	atomic_inc(&bio->__bi_remaining);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index db7a57ee0e58..2deea4501d14 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -46,6 +46,15 @@ struct bio {
 	unsigned int		bi_seg_front_size;
 	unsigned int		bi_seg_back_size;
 
+	/* __bi_remaining records the number of completion events
+	 * (i.e. calls to bi_end_io()) that need to happen before this
+	 * bio is truly complete.
+	 * A value of '0' means that there will only ever be one completion
+	 * event, so there will be no racing and no need for an atomic operation
+	 * to detect the last event.
+	 * Any other value represents a simple count subject to atomic_inc() and
+	 * atomic_dec_and_test().
+	 */
 	atomic_t		__bi_remaining;
 
 	bio_end_io_t		*bi_end_io;
@@ -98,7 +107,7 @@ struct bio {
 #define BIO_USER_MAPPED 4	/* contains user pages */
 #define BIO_NULL_MAPPED 5	/* contains invalid user pages */
 #define BIO_QUIET	6	/* Make BIO Quiet */
-#define BIO_CHAIN	7	/* chained bio, ->bi_remaining in effect */
+/* 7 unused */
 #define BIO_REFFED	8	/* bio has elevated ->bi_cnt */
 #define BIO_THROTTLED	9	/* This bio has already been subjected to
 				 * throttling rules. Don't do it again. */

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

  reply	other threads:[~2017-03-26 23:17 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-22  2:38 [PATCH] block: trace completion of all bios NeilBrown
2017-03-22 12:51 ` Christoph Hellwig
2017-03-23  6:26   ` NeilBrown
2017-03-23  6:29   ` [PATCH v2] " NeilBrown
2017-03-23 10:43     ` Ming Lei
2017-03-24  0:06       ` NeilBrown
2017-03-24  0:07       ` [PATCH v3] " NeilBrown
2017-03-24  6:47         ` Ming Lei
2017-03-26 23:17           ` NeilBrown [this message]
2017-03-27  9:03         ` Christoph Hellwig
2017-03-27  9:49           ` NeilBrown
2017-03-27 17:14             ` Christoph Hellwig
2017-03-27 23:42               ` [dm-devel] " NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87inmv8w7k.fsf@notabene.neil.brown.name \
    --to=neilb@suse.com \
    --cc=agk@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=dm-devel@redhat.com \
    --cc=hch@infradead.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=shli@kernel.org \
    --cc=snitzer@redhat.com \
    --cc=tom.leiming@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).