linux-security-module.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: John Garry <john.g.garry@oracle.com>
To: axboe@kernel.dk, kbusch@kernel.org, hch@lst.de, sagi@grimberg.me,
	martin.petersen@oracle.com, djwong@kernel.org,
	viro@zeniv.linux.org.uk, brauner@kernel.org, dchinner@redhat.com,
	jejb@linux.ibm.com
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-nvme@lists.infradead.org, linux-scsi@vger.kernel.org,
	linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-security-module@vger.kernel.org, paul@paul-moore.com,
	jmorris@namei.org, serge@hallyn.com,
	John Garry <john.g.garry@oracle.com>
Subject: [PATCH RFC 08/16] block: Add support for atomic_write_unit
Date: Wed,  3 May 2023 18:38:13 +0000	[thread overview]
Message-ID: <20230503183821.1473305-9-john.g.garry@oracle.com> (raw)
In-Reply-To: <20230503183821.1473305-1-john.g.garry@oracle.com>

Add bio.atomic_write_unit, which is the min size which we can split a bio.
Any bio needs to be split in a multiple of this size and also aligned to
this size.

In __bio_iov_iter_get_pages(), use atomic_write_unit to trim a bio to
be a multiple of atomic_write_unit.

In bio_split_rw(), we need to consider splitting as follows:
- For a regular split which does not cross an atomic write boundary, same
  as in __bio_iov_iter_get_pages(), trim to be a multiple of
  atomic_write_unit
- We also need to check for when a bio straddles an atomic write boundary.
  In this case, split to be start/end-aligned with the boundary.

We need to ignore lim->max_sectors since to may be less than
bio->write_atomic_unit, which we cannot tolerate.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 block/bio.c               |  7 +++-
 block/blk-merge.c         | 84 ++++++++++++++++++++++++++++++++++-----
 include/linux/blk_types.h |  2 +
 3 files changed, 81 insertions(+), 12 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index fd11614bba4d..fc2f29e1c14c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -247,6 +247,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
 	      unsigned short max_vecs, blk_opf_t opf)
 {
 	bio->bi_next = NULL;
+	bio->atomic_write_unit = 0;
 	bio->bi_bdev = bdev;
 	bio->bi_opf = opf;
 	bio->bi_flags = 0;
@@ -815,6 +816,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 	bio->bi_ioprio = bio_src->bi_ioprio;
 	bio->bi_iter = bio_src->bi_iter;
 
+	bio->atomic_write_unit = bio_src->atomic_write_unit;
 	if (bio->bi_bdev) {
 		if (bio->bi_bdev == bio_src->bi_bdev &&
 		    bio_flagged(bio_src, BIO_REMAPPED))
@@ -1273,7 +1275,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 
 	nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
 
-	trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
+	if (bio->atomic_write_unit)
+		trim = size & (bio->atomic_write_unit - 1);
+	else
+		trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
 	iov_iter_revert(iter, trim);
 
 	size -= trim;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 6460abdb2426..95ab6b644955 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -171,7 +171,17 @@ static inline unsigned get_max_io_size(struct bio *bio,
 {
 	unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
 	unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
-	unsigned max_sectors = lim->max_sectors, start, end;
+	unsigned max_sectors, start, end;
+
+	/*
+	 * We ignore lim->max_sectors for atomic writes simply because
+	 * it may less than bio->write_atomic_unit, which we cannot
+	 * tolerate.
+	 */
+	if (bio->bi_opf & REQ_ATOMIC)
+		max_sectors = lim->atomic_write_max_bytes >> SECTOR_SHIFT;
+	else
+		max_sectors = lim->max_sectors;
 
 	if (lim->chunk_sectors) {
 		max_sectors = min(max_sectors,
@@ -256,6 +266,22 @@ static bool bvec_split_segs(const struct queue_limits *lim,
 	return len > 0 || bv->bv_len > max_len;
 }
 
+static bool bio_straddles_boundary(struct bio *bio, unsigned int bytes,
+				   unsigned int boundary)
+{
+	loff_t start = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+	loff_t end = start + bytes;
+	loff_t start_mod = start % boundary;
+	loff_t end_mod = end % boundary;
+
+	if (end - start > boundary)
+		return true;
+	if ((start_mod > end_mod) && (start_mod && end_mod))
+		return true;
+
+	return false;
+}
+
 /**
  * bio_split_rw - split a bio in two bios
  * @bio:  [in] bio to be split
@@ -276,10 +302,15 @@ static bool bvec_split_segs(const struct queue_limits *lim,
  * responsible for ensuring that @bs is only destroyed after processing of the
  * split bio has finished.
  */
+
+
 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
 		unsigned *segs, struct bio_set *bs, unsigned max_bytes)
 {
+	unsigned int atomic_write_boundary = lim->atomic_write_boundary;
+	bool atomic_write = bio->bi_opf & REQ_ATOMIC;
 	struct bio_vec bv, bvprv, *bvprvp = NULL;
+	bool straddles_boundary = false;
 	struct bvec_iter iter;
 	unsigned nsegs = 0, bytes = 0;
 
@@ -291,14 +322,31 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
 		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
 			goto split;
 
+		if (atomic_write && atomic_write_boundary) {
+			straddles_boundary = bio_straddles_boundary(bio,
+					bytes + bv.bv_len, atomic_write_boundary);
+		}
 		if (nsegs < lim->max_segments &&
 		    bytes + bv.bv_len <= max_bytes &&
-		    bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
+		    bv.bv_offset + bv.bv_len <= PAGE_SIZE &&
+		    !straddles_boundary) {
 			nsegs++;
 			bytes += bv.bv_len;
 		} else {
-			if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
-					lim->max_segments, max_bytes))
+			bool split_the_segs =
+				bvec_split_segs(lim, &bv, &nsegs, &bytes,
+						lim->max_segments, max_bytes);
+
+			/*
+			 * We may not actually straddle the boundary as we may
+			 * have added less bytes than anticipated
+			 */
+			if (straddles_boundary) {
+				straddles_boundary = bio_straddles_boundary(bio,
+						bytes, atomic_write_boundary);
+			}
+
+			if (split_the_segs || straddles_boundary)
 				goto split;
 		}
 
@@ -321,12 +369,25 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
 
 	*segs = nsegs;
 
-	/*
-	 * Individual bvecs might not be logical block aligned. Round down the
-	 * split size so that each bio is properly block size aligned, even if
-	 * we do not use the full hardware limits.
-	 */
-	bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
+	if (straddles_boundary) {
+		loff_t new_end = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + bytes;
+		unsigned int trim = new_end & (atomic_write_boundary - 1);
+		bytes -= trim;
+		new_end = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + bytes;
+		BUG_ON(new_end % atomic_write_boundary);
+	} else if (bio->atomic_write_unit) {
+		unsigned int atomic_write_unit = bio->atomic_write_unit;
+		unsigned int trim = bytes % atomic_write_unit;
+
+		bytes -= trim;
+	} else {
+		/*
+		 * Individual bvecs might not be logical block aligned. Round down the
+		 * split size so that each bio is properly block size aligned, even if
+		 * we do not use the full hardware limits.
+		 */
+		bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
+	}
 
 	/*
 	 * Bio splitting may cause subtle trouble such as hang when doing sync
@@ -355,7 +416,8 @@ struct bio *__bio_split_to_limits(struct bio *bio,
 				  const struct queue_limits *lim,
 				  unsigned int *nr_segs)
 {
-	struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
+	struct block_device *bi_bdev = bio->bi_bdev;
+	struct bio_set *bs = &bi_bdev->bd_disk->bio_split;
 	struct bio *split;
 
 	switch (bio_op(bio)) {
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 347b52e00322..daa44eac9f14 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -303,6 +303,8 @@ struct bio {
 
 	struct bio_set		*bi_pool;
 
+	unsigned int atomic_write_unit;
+
 	/*
 	 * We can inline a number of vecs at the end of the bio, to avoid
 	 * double allocations for a small number of bio_vecs. This member
-- 
2.31.1


  parent reply	other threads:[~2023-05-03 18:40 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-03 18:38 [PATCH RFC 00/16] block atomic writes John Garry
2023-05-03 18:38 ` [PATCH RFC 01/16] block: Add atomic write operations to request_queue limits John Garry
2023-05-03 21:39   ` Dave Chinner
2023-05-04 18:14     ` John Garry
2023-05-04 22:26       ` Dave Chinner
2023-05-05  7:54         ` John Garry
2023-05-05 22:00           ` Darrick J. Wong
2023-05-07  1:59             ` Martin K. Petersen
2023-05-05 23:18           ` Dave Chinner
2023-05-06  9:38             ` John Garry
2023-05-07  2:35             ` Martin K. Petersen
2023-05-05 22:47         ` Eric Biggers
2023-05-05 23:31           ` Dave Chinner
2023-05-06  0:08             ` Eric Biggers
2023-05-09  0:19   ` Mike Snitzer
2023-05-17 17:02     ` John Garry
2023-05-03 18:38 ` [PATCH RFC 02/16] fs/bdev: Add atomic write support info to statx John Garry
2023-05-03 21:58   ` Dave Chinner
2023-05-04  8:45     ` John Garry
2023-05-04 22:40       ` Dave Chinner
2023-05-05  8:01         ` John Garry
2023-05-05 22:04           ` Darrick J. Wong
2023-05-03 18:38 ` [PATCH RFC 03/16] xfs: Support atomic write for statx John Garry
2023-05-03 22:17   ` Dave Chinner
2023-05-05 22:10     ` Darrick J. Wong
2023-05-03 18:38 ` [PATCH RFC 04/16] fs: Add RWF_ATOMIC and IOCB_ATOMIC flags for atomic write support John Garry
2023-05-03 18:38 ` [PATCH RFC 05/16] block: Add REQ_ATOMIC flag John Garry
2023-05-03 18:38 ` [PATCH RFC 06/16] block: Limit atomic writes according to bio and queue limits John Garry
2023-05-03 18:53   ` Keith Busch
2023-05-04  8:24     ` John Garry
2023-05-03 18:38 ` [PATCH RFC 07/16] block: Add bdev_find_max_atomic_write_alignment() John Garry
2023-05-03 18:38 ` John Garry [this message]
2023-05-03 18:38 ` [PATCH RFC 09/16] block: Add blk_validate_atomic_write_op() John Garry
2023-05-03 18:38 ` [PATCH RFC 10/16] block: Add fops atomic write support John Garry
2023-05-03 18:38 ` [PATCH RFC 11/16] fs: iomap: Atomic " John Garry
2023-05-04  5:00   ` Dave Chinner
2023-05-05 21:19     ` Darrick J. Wong
2023-05-05 23:56       ` Dave Chinner
2023-05-03 18:38 ` [PATCH RFC 12/16] xfs: Add support for fallocate2 John Garry
2023-05-03 23:26   ` Dave Chinner
2023-05-05 22:23     ` Darrick J. Wong
2023-05-05 23:42       ` Dave Chinner
2023-05-03 18:38 ` [PATCH RFC 13/16] scsi: sd: Support reading atomic properties from block limits VPD John Garry
2023-05-03 18:38 ` [PATCH RFC 14/16] scsi: sd: Add WRITE_ATOMIC_16 support John Garry
2023-05-03 18:48   ` Bart Van Assche
2023-05-04  8:17     ` John Garry
2023-05-03 18:38 ` [PATCH RFC 15/16] scsi: scsi_debug: Atomic write support John Garry
2023-05-03 18:38 ` [PATCH RFC 16/16] nvme: Support atomic writes John Garry
2023-05-03 18:49   ` Bart Van Assche
2023-05-04  8:19     ` John Garry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230503183821.1473305-9-john.g.garry@oracle.com \
    --to=john.g.garry@oracle.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=dchinner@redhat.com \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=jejb@linux.ibm.com \
    --cc=jmorris@namei.org \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=paul@paul-moore.com \
    --cc=sagi@grimberg.me \
    --cc=serge@hallyn.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).