Distributed Replicated Block Device (DRBD) development
 help / color / mirror / Atom feed
* [Drbd-dev] [PATCH 0/3] 5.18 kernel patches w/out compat
@ 2022-09-08 21:13 Michael D Labriola
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support Michael D Labriola
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Michael D Labriola @ 2022-09-08 21:13 UTC (permalink / raw)
  To: drbd-dev

Ahoy!  I put together the following patches to get things compiling on
5.18 a while back and never got a chance to do anything with them.
Now 5.19 is stable, 5.18 is EOL, and 6.0 is coming soon.  Where has
the time gone.

I just rebased on drbd-9.1 today, everything compiles against 5.18,
and my resources at least come up and look ok w/out doing any
strenuous testing.  Can somebody take a look at these and make sure
I'm not way off course before I go gin up compat patches to go along
with these?

Michael D Labriola (3):
      drbd: remove WRITE_SAME support
      drbd: remove reliance on bdi congestion
      drbd: use bio_alloc_clone() instead of bio_clone_fast()

 drbd/drbd_main.c          | 35 +++++++----------------------------
 drbd/drbd_nl.c            | 75 +++------------------------------------------------------------------------
 drbd/drbd_receiver.c      | 38 +++++++-------------------------------
 drbd/drbd_req.c           |  6 ++----
 drbd/drbd_sender.c        |  4 ----
 drbd/drbd_transport_tcp.c |  3 ---
 6 files changed, 19 insertions(+), 142 deletions(-)


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support
  2022-09-08 21:13 [Drbd-dev] [PATCH 0/3] 5.18 kernel patches w/out compat Michael D Labriola
@ 2022-09-08 21:13 ` Michael D Labriola
  2022-09-09  7:57   ` Joel Colledge
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 2/3] drbd: remove reliance on bdi congestion Michael D Labriola
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 3/3] drbd: use bio_alloc_clone() instead of bio_clone_fast() Michael D Labriola
  2 siblings, 1 reply; 10+ messages in thread
From: Michael D Labriola @ 2022-09-08 21:13 UTC (permalink / raw)
  To: drbd-dev

This commit mimics upstream commit a34592ff6b78, which removes all the
WRITE_SAME code because "REQ_OP_WRITE_SAME was only ever submitted by
the legacy Linux zeroing code, which has switched to use
REQ_OP_WRITE_ZEROES long ago."  WRITE_SAME was then removed from Linux
5.18.

Signed-off-by: Michael D Labriola <veggiemike@sourceruckus.org>
---
 drbd/drbd_main.c          | 35 ++++----------------
 drbd/drbd_nl.c            | 70 ++-------------------------------------
 drbd/drbd_receiver.c      | 38 ++++-----------------
 drbd/drbd_req.c           |  1 -
 drbd/drbd_sender.c        |  4 ---
 drbd/drbd_transport_tcp.c |  3 --
 6 files changed, 16 insertions(+), 135 deletions(-)

diff --git a/drbd/drbd_main.c b/drbd/drbd_main.c
index 545d739d..c0b37c9c 100644
--- a/drbd/drbd_main.c
+++ b/drbd/drbd_main.c
@@ -1572,8 +1572,7 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device,
 			queue_discard_zeroes_data(q)
 			/* but that is always false on recent kernels */
 			;
-		p->qlim->write_same_capable = !disable_write_same &&
-			!!q->limits.max_write_same_sectors;
+		p->qlim->write_same_capable = 0;
 		put_ldev(device);
 	} else {
 		struct request_queue *q = device->rq_queue;
@@ -2137,9 +2136,6 @@ static int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio)
 					 bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
 		if (err)
 			return err;
-		/* WRITE_SAME has only one segment */
-		if (bio_op(bio) == REQ_OP_WRITE_SAME)
-			break;
 
 		peer_device->send_cnt += bvec.bv_len >> 9;
 	}
@@ -2219,7 +2215,6 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, struct bio *bio
 		return  (bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0) |
 			(bio->bi_opf & REQ_FUA ? DP_FUA : 0) |
 			(bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
-			(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
 			(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
 			(bio_op(bio) == REQ_OP_WRITE_ZEROES ?
 			 ((connection->agreed_features & DRBD_FF_WZEROES) ?
@@ -2241,7 +2236,6 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 	char *const after = peer_device->connection->scratch_buffer.d.after;
 	struct p_trim *trim = NULL;
 	struct p_data *p;
-	struct p_wsame *wsame = NULL;
 	void *digest_out = NULL;
 	unsigned int dp_flags = 0;
 	int digest_size = 0;
@@ -2259,19 +2253,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 		if (peer_device->connection->integrity_tfm)
 			digest_size = crypto_shash_digestsize(peer_device->connection->integrity_tfm);
 
-		if (op == REQ_OP_WRITE_SAME) {
-			wsame = drbd_prepare_command(peer_device, sizeof(*wsame) + digest_size, DATA_STREAM);
-			if (!wsame)
-				return -EIO;
-			p = &wsame->p_data;
-			wsame->size = cpu_to_be32(req->i.size);
-			digest_out = wsame + 1;
-		} else {
-			p = drbd_prepare_command(peer_device, sizeof(*p) + digest_size, DATA_STREAM);
-			if (!p)
-				return -EIO;
-			digest_out = p + 1;
-		}
+		p = drbd_prepare_command(peer_device, sizeof(*p) + digest_size, DATA_STREAM);
+		if (!p)
+			return -EIO;
+		digest_out = p + 1;
 	}
 
 	p->sector = cpu_to_be64(req->i.sector);
@@ -2300,14 +2285,8 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 		memcpy(digest_out, before, digest_size);
 	}
 
-	if (wsame) {
-		additional_size_command(peer_device->connection, DATA_STREAM,
-					bio_iovec(req->master_bio).bv_len);
-		err = __send_command(peer_device->connection, device->vnr, P_WSAME, DATA_STREAM);
-	} else {
-		additional_size_command(peer_device->connection, DATA_STREAM, req->i.size);
-		err = __send_command(peer_device->connection, device->vnr, P_DATA, DATA_STREAM);
-	}
+	additional_size_command(peer_device->connection, DATA_STREAM, req->i.size);
+	err = __send_command(peer_device->connection, device->vnr, P_DATA, DATA_STREAM);
 	if (!err) {
 		/* For protocol A, we have to memcpy the payload into
 		 * socket buffers, as we may complete right away
diff --git a/drbd/drbd_nl.c b/drbd/drbd_nl.c
index 4d7af4f4..79464bb7 100644
--- a/drbd/drbd_nl.c
+++ b/drbd/drbd_nl.c
@@ -1996,69 +1996,6 @@ static void fixup_write_zeroes(struct drbd_device *device, struct request_queue
 		q->limits.max_write_zeroes_sectors = 0;
 }
 
-static void decide_on_write_same_support(struct drbd_device *device,
-			struct request_queue *q,
-			struct request_queue *b, struct o_qlim *o,
-			bool disable_write_same)
-{
-	bool can_do = b ? b->limits.max_write_same_sectors : true;
-
-	if (can_do && disable_write_same) {
-		can_do = false;
-		drbd_info(device, "WRITE_SAME disabled by config\n");
-	}
-
-	if (can_do && !(common_connection_features(device->resource) & DRBD_FF_WSAME)) {
-		can_do = false;
-		drbd_info(device, "peer does not support WRITE_SAME\n");
-	}
-
-	if (o) {
-		/* logical block size; queue_logical_block_size(NULL) is 512 */
-		unsigned int peer_lbs = be32_to_cpu(o->logical_block_size);
-		unsigned int me_lbs_b = queue_logical_block_size(b);
-		unsigned int me_lbs = queue_logical_block_size(q);
-
-		if (me_lbs_b != me_lbs) {
-			drbd_warn(device,
-				"logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n",
-				me_lbs, me_lbs_b);
-			/* rather disable write same than trigger some BUG_ON later in the scsi layer. */
-			can_do = false;
-		}
-		if (me_lbs_b != peer_lbs) {
-			drbd_warn(device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n",
-				me_lbs, peer_lbs);
-			if (can_do) {
-				dynamic_drbd_dbg(device, "logical block size mismatch: WRITE_SAME disabled.\n");
-				can_do = false;
-			}
-			me_lbs = max(me_lbs, me_lbs_b);
-			/* We cannot change the logical block size of an in-use queue.
-			 * We can only hope that access happens to be properly aligned.
-			 * If not, the peer will likely produce an IO error, and detach. */
-			if (peer_lbs > me_lbs) {
-				if (device->resource->role[NOW] != R_PRIMARY) {
-					blk_queue_logical_block_size(q, peer_lbs);
-					drbd_warn(device, "logical block size set to %u\n", peer_lbs);
-				} else {
-					drbd_warn(device,
-						"current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n",
-						me_lbs, peer_lbs);
-				}
-			}
-		}
-		if (can_do && !o->write_same_capable) {
-			/* If we introduce an open-coded write-same loop on the receiving side,
-			 * the peer would present itself as "capable". */
-			dynamic_drbd_dbg(device, "WRITE_SAME disabled (peer device not capable)\n");
-			can_do = false;
-		}
-	}
-
-	blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0);
-}
-
 static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
 				   unsigned int max_bio_size, struct o_qlim *o)
 {
@@ -2067,7 +2004,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
 	struct request_queue *b = NULL;
 	struct disk_conf *dc;
 	bool discard_zeroes_if_aligned = true;
-	bool disable_write_same = false;
 
 	if (bdev) {
 		b = bdev->backing_bdev->bd_disk->queue;
@@ -2076,7 +2012,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
 		rcu_read_lock();
 		dc = rcu_dereference(device->ldev->disk_conf);
 		discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
-		disable_write_same = dc->disable_write_same;
 		rcu_read_unlock();
 
 		blk_set_stacking_limits(&q->limits);
@@ -2086,7 +2021,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
 	/* This is the workaround for "bio would need to, but cannot, be split" */
 	blk_queue_segment_boundary(q, PAGE_SIZE-1);
 	decide_on_discard_support(device, q, b, o, discard_zeroes_if_aligned);
-	decide_on_write_same_support(device, q, b, o, disable_write_same);
 
 	if (b) {
 		blk_stack_limits(&q->limits, &b->limits, 0);
@@ -2358,8 +2292,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	if (write_ordering_changed(old_disk_conf, new_disk_conf))
 		drbd_bump_write_ordering(device->resource, NULL, WO_BIO_BARRIER);
 
-	if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned
-	||  old_disk_conf->disable_write_same != new_disk_conf->disable_write_same)
+	if (old_disk_conf->discard_zeroes_if_aligned !=
+	    new_disk_conf->discard_zeroes_if_aligned)
 		drbd_reconsider_queue_parameters(device, device->ldev, NULL);
 
 	drbd_md_sync_if_dirty(device);
diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c
index 4783d58e..eba6a3c4 100644
--- a/drbd/drbd_receiver.c
+++ b/drbd/drbd_receiver.c
@@ -1746,17 +1746,6 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru
 	drbd_endio_write_sec_final(peer_req);
 }
 
-static void drbd_issue_peer_wsame(struct drbd_device *device,
-				  struct drbd_peer_request *peer_req)
-{
-	struct block_device *bdev = device->ldev->backing_bdev;
-	sector_t s = peer_req->i.sector;
-	sector_t nr = peer_req->i.size >> 9;
-	if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->page_chain.head))
-		peer_req->flags |= EE_WAS_ERROR;
-	drbd_endio_write_sec_final(peer_req);
-}
-
 static bool conn_wait_ee_cond(struct drbd_connection *connection, struct list_head *head)
 {
 	bool done;
@@ -1827,14 +1816,11 @@ int drbd_submit_peer_request(struct drbd_peer_request *peer_req)
 	 * Correctness first, performance later.  Next step is to code an
 	 * asynchronous variant of the same.
 	 */
-	if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
+	if (peer_req->flags & (EE_TRIM|EE_ZEROOUT)) {
 		peer_req->submit_jif = jiffies;
 		peer_req->flags |= EE_SUBMITTED;
 
-		if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
-			drbd_issue_peer_discard_or_zero_out(device, peer_req);
-		else /* EE_WRITE_SAME */
-			drbd_issue_peer_wsame(device, peer_req);
+		drbd_issue_peer_discard_or_zero_out(device, peer_req);
 		return 0;
 	}
 
@@ -1847,7 +1833,7 @@ int drbd_submit_peer_request(struct drbd_peer_request *peer_req)
 	 * generated bio, but a bio allocated on behalf of the peer.
 	 */
 next_bio:
-	/* REQ_OP_WRITE_SAME, _DISCARD, _WRITE_ZEROES handled above.
+	/* _DISCARD, _WRITE_ZEROES handled above.
 	 * REQ_OP_FLUSH (empty flush) not expected,
 	 * should have been mapped to a "drbd protocol barrier".
 	 * REQ_OP_SECURE_ERASE: I don't see how we could ever support that.
@@ -2118,7 +2104,7 @@ static void p_req_detail_from_pi(struct drbd_connection *connection,
 		struct drbd_peer_request_details *d, struct packet_info *pi)
 {
 	struct p_trim *p = pi->data;
-	bool is_trim_or_wsame = pi->cmd == P_TRIM || pi->cmd == P_WSAME || pi->cmd == P_ZEROES;
+	bool is_trim_or_zeroes = pi->cmd == P_TRIM || pi->cmd == P_ZEROES;
 	unsigned int digest_size =
 		pi->cmd != P_TRIM && connection->peer_integrity_tfm ?
 		crypto_shash_digestsize(connection->peer_integrity_tfm) : 0;
@@ -2128,7 +2114,7 @@ static void p_req_detail_from_pi(struct drbd_connection *connection,
 	d->peer_seq = be64_to_cpu(p->p_data.seq_num);
 	d->dp_flags = be32_to_cpu(p->p_data.dp_flags);
 	d->length = pi->size;
-	d->bi_size = is_trim_or_wsame ? be32_to_cpu(p->size) : pi->size - digest_size;
+	d->bi_size = is_trim_or_zeroes ? be32_to_cpu(p->size) : pi->size - digest_size;
 	d->digest_size = digest_size;
 }
 
@@ -2161,7 +2147,7 @@ read_in_block(struct drbd_peer_device *peer_device, struct drbd_peer_request_det
 
 	if (!expect(peer_device, IS_ALIGNED(d->bi_size, 512)))
 		return NULL;
-	if (d->dp_flags & (DP_WSAME|DP_DISCARD|DP_ZEROES)) {
+	if (d->dp_flags & (DP_DISCARD|DP_ZEROES)) {
 		if (!expect(peer_device, d->bi_size <= (DRBD_MAX_BBIO_SECTORS << 9)))
 			return NULL;
 	} else if (!expect(peer_device, d->bi_size <= DRBD_MAX_BIO_SIZE))
@@ -2696,10 +2682,7 @@ static unsigned long wire_flags_to_bio_op(u32 dpf)
 		return REQ_OP_WRITE_ZEROES;
 	if (dpf & DP_DISCARD)
 		return REQ_OP_DISCARD;
-	if (dpf & DP_WSAME)
-		return REQ_OP_WRITE_SAME;
-	else
-		return REQ_OP_WRITE;
+	return REQ_OP_WRITE;
 }
 
 /* see also bio_flags_to_wire() */
@@ -2908,8 +2891,6 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 		peer_req->flags |= EE_TRIM;
 	else if (pi->cmd == P_ZEROES)
 		peer_req->flags |= EE_ZEROOUT;
-	else if (pi->cmd == P_WSAME)
-		peer_req->flags |= EE_WRITE_SAME;
 
 	peer_req->dagtag_sector = atomic64_read(&connection->last_dagtag_sector) + (peer_req->i.size >> 9);
 	atomic64_set(&connection->last_dagtag_sector, peer_req->dagtag_sector);
@@ -2937,10 +2918,6 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
 		if (d.dp_flags & DP_DISCARD)
 			peer_req->flags |= EE_TRIM;
-	} else if (pi->cmd == P_WSAME) {
-		D_ASSERT(peer_device, peer_req->i.size > 0);
-		D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_SAME);
-		D_ASSERT(peer_device, peer_req->page_chain.head != NULL);
 	} else if (peer_req->page_chain.head == NULL) {
 		/* Actually, this must not happen anymore,
 		 * "empty" flushes are mapped to P_BARRIER,
@@ -8127,7 +8104,6 @@ static struct data_cmd drbd_cmd_handler[] = {
 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
 	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
 	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
-	[P_WSAME]	    = { 1, sizeof(struct p_wsame), receive_Data },
 	[P_DISCONNECT]      = { 0, 0, receive_disconnect },
 };
 
diff --git a/drbd/drbd_req.c b/drbd/drbd_req.c
index dc0eb613..f58bfc4a 100644
--- a/drbd/drbd_req.c
+++ b/drbd/drbd_req.c
@@ -54,7 +54,6 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
 	spin_lock_init(&req->rq_lock);
 
 	req->local_rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
-	              | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
 	              | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
 	              | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
 
diff --git a/drbd/drbd_sender.c b/drbd/drbd_sender.c
index d4759d82..22a5a996 100644
--- a/drbd/drbd_sender.c
+++ b/drbd/drbd_sender.c
@@ -355,10 +355,6 @@ void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest)
 		src = kmap_atomic(bvec.bv_page);
 		crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len);
 		kunmap_atomic(src);
-		/* WRITE_SAME has only one segment,
-		 * checksum the payload only once. */
-		if (bio_op(bio) == REQ_OP_WRITE_SAME)
-			break;
 	}
 	crypto_shash_final(desc, digest);
 	shash_desc_zero(desc);
diff --git a/drbd/drbd_transport_tcp.c b/drbd/drbd_transport_tcp.c
index 9911d26a..29ff0bcf 100644
--- a/drbd/drbd_transport_tcp.c
+++ b/drbd/drbd_transport_tcp.c
@@ -1225,9 +1225,6 @@ static int dtt_send_zc_bio(struct drbd_transport *transport, struct bio *bio)
 				      bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
 		if (err)
 			return err;
-
-		if (bio_op(bio) == REQ_OP_WRITE_SAME)
-			break;
 	}
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Drbd-dev] [PATCH 2/3] drbd: remove reliance on bdi congestion
  2022-09-08 21:13 [Drbd-dev] [PATCH 0/3] 5.18 kernel patches w/out compat Michael D Labriola
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support Michael D Labriola
@ 2022-09-08 21:13 ` Michael D Labriola
  2022-09-09  8:18   ` Joel Colledge
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 3/3] drbd: use bio_alloc_clone() instead of bio_clone_fast() Michael D Labriola
  2 siblings, 1 reply; 10+ messages in thread
From: Michael D Labriola @ 2022-09-08 21:13 UTC (permalink / raw)
  To: drbd-dev

A serries of commits between 5.17 and 5.18 removed inode_congested()
and all it's related functions because "No bdi reports congestion any
more, so this always returns 'false'".

This commit just returns false in both places bdi_congested() was
called... which probably isn't the best long-term fix.

See upstream commits:

670d21c6e17f67535fcf16e14c772209220da9ae
6df25e58532be7a4cd6fb15bcd85805947402d91
503d4fa6ee28e8d4d201c92ac3922d1b3526f844
fe55d563d4174f13839a9b7ef7309da5031b5d93

Signed-off-by: Michael D Labriola <veggiemike@sourceruckus.org>
---
 drbd/drbd_nl.c  | 5 +----
 drbd/drbd_req.c | 3 +--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drbd/drbd_nl.c b/drbd/drbd_nl.c
index 79464bb7..1a126d41 100644
--- a/drbd/drbd_nl.c
+++ b/drbd/drbd_nl.c
@@ -5446,10 +5446,7 @@ static void device_to_statistics(struct device_statistics *s,
 		spin_unlock_irq(&md->uuid_lock);
 
 		s->dev_disk_flags = md->flags;
-		s->dev_lower_blocked =
-			bdi_congested(device->ldev->backing_bdev->bd_disk->bdi,
-				      (1 << WB_async_congested) |
-				      (1 << WB_sync_congested));
+		s->dev_lower_blocked = false;
 		put_ldev(device);
 	}
 	s->dev_size = get_capacity(device->vdisk);
diff --git a/drbd/drbd_req.c b/drbd/drbd_req.c
index f58bfc4a..fc7dfb8d 100644
--- a/drbd/drbd_req.c
+++ b/drbd/drbd_req.c
@@ -1274,8 +1274,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device,
 
 	switch (rbm) {
 	case RB_CONGESTED_REMOTE:
-		return bdi_read_congested(
-			device->ldev->backing_bdev->bd_disk->bdi);
+		return false;
 	case RB_LEAST_PENDING:
 		return atomic_read(&device->local_cnt) >
 			atomic_read(&peer_device->ap_pending_cnt) + atomic_read(&peer_device->rs_pending_cnt);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Drbd-dev] [PATCH 3/3] drbd: use bio_alloc_clone() instead of bio_clone_fast()
  2022-09-08 21:13 [Drbd-dev] [PATCH 0/3] 5.18 kernel patches w/out compat Michael D Labriola
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support Michael D Labriola
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 2/3] drbd: remove reliance on bdi congestion Michael D Labriola
@ 2022-09-08 21:13 ` Michael D Labriola
  2022-09-09  7:59   ` Joel Colledge
  2022-09-30 17:02   ` Christoph Böhmwalder
  2 siblings, 2 replies; 10+ messages in thread
From: Michael D Labriola @ 2022-09-08 21:13 UTC (permalink / raw)
  To: drbd-dev

Between 5.17 and 5.18, bio_clone_fast() was modified to take a
block_device and the function was renamed.  This commit migrates to
the new usage.

Upstream commits:
abfc426d block: pass a block_device to bio_clone_fast

Signed-off-by: Michael D Labriola <veggiemike@sourceruckus.org>
---
 drbd/drbd_req.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drbd/drbd_req.c b/drbd/drbd_req.c
index fc7dfb8d..06ba8ce8 100644
--- a/drbd/drbd_req.c
+++ b/drbd/drbd_req.c
@@ -1715,7 +1715,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio,
 	req->start_jif = bio_start_io_acct(req->master_bio);
 
 	if (get_ldev(device)) {
-		req->private_bio  = bio_clone_fast(bio, GFP_NOIO, &drbd_io_bio_set);
+		req->private_bio  = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, &drbd_io_bio_set);
 		req->private_bio->bi_private = req;
 		req->private_bio->bi_end_io = drbd_request_endio;
 	}
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support Michael D Labriola
@ 2022-09-09  7:57   ` Joel Colledge
  2022-09-09 13:04     ` Lars Ellenberg
  0 siblings, 1 reply; 10+ messages in thread
From: Joel Colledge @ 2022-09-09  7:57 UTC (permalink / raw)
  To: Michael D Labriola; +Cc: Lars Ellenberg, drbd-dev

Hi Michael,

Thanks for the patch!

> This commit mimics upstream commit a34592ff6b78, which removes all the
> WRITE_SAME code because "REQ_OP_WRITE_SAME was only ever submitted by
> the legacy Linux zeroing code, which has switched to use
> REQ_OP_WRITE_ZEROES long ago."  WRITE_SAME was then removed from Linux
> 5.18.

This is an interesting case. If we remove all the write-same code like
this, then we either need to add it back in via compat, or we lose a
feature. I think it is OK to lose the feature on 5.18+ kernels, since
the rest of the kernel does not use it any more. On older kernels this
might break real use cases.

There is also the case where our peer is running an older kernel and
we are running 5.18+. I think we should worry about that after
deciding what to do with peers that run the same kernel.

I've added Lars to CC because he is more familiar with the historical
changes in this area.

Note: If we do remove the feature entirely, we should stop advertising
the feature flag DRBD_FF_WSAME so that the peer knows that we do not
support it.

Best regards,
Joel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Drbd-dev] [PATCH 3/3] drbd: use bio_alloc_clone() instead of bio_clone_fast()
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 3/3] drbd: use bio_alloc_clone() instead of bio_clone_fast() Michael D Labriola
@ 2022-09-09  7:59   ` Joel Colledge
  2022-09-30 17:02   ` Christoph Böhmwalder
  1 sibling, 0 replies; 10+ messages in thread
From: Joel Colledge @ 2022-09-09  7:59 UTC (permalink / raw)
  To: Michael D Labriola; +Cc: drbd-dev

Hi Michael,

I believe Christoph has already started work on this one, so I'm
copying him in so that you can discuss it.

Thanks,
Joel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Drbd-dev] [PATCH 2/3] drbd: remove reliance on bdi congestion
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 2/3] drbd: remove reliance on bdi congestion Michael D Labriola
@ 2022-09-09  8:18   ` Joel Colledge
  0 siblings, 0 replies; 10+ messages in thread
From: Joel Colledge @ 2022-09-09  8:18 UTC (permalink / raw)
  To: Michael D Labriola; +Cc: drbd-dev

Hi Michael,

> A serries of commits between 5.17 and 5.18 removed inode_congested()
> and all it's related functions because "No bdi reports congestion any
> more, so this always returns 'false'".
>
> This commit just returns false in both places bdi_congested() was
> called... which probably isn't the best long-term fix.

I suspect this is the best we can do in both cases. Perhaps with a
comment explaining why we just return false. The alternative would be
to invent some kind of heuristic to guess whether the backing device
is congested. That sounds like an intractable problem.

It would be interesting to know since when "No bdi reports congestion
any more". That would tell us how long we need to keep the compat
code.

It would also be interesting to know whether any real user depends on
this at all, but we'll never know that.

Please go ahead and add the compat code.

Thanks,
Joel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support
  2022-09-09  7:57   ` Joel Colledge
@ 2022-09-09 13:04     ` Lars Ellenberg
  2022-09-12 19:08       ` Michael Labriola
  0 siblings, 1 reply; 10+ messages in thread
From: Lars Ellenberg @ 2022-09-09 13:04 UTC (permalink / raw)
  To: drbd-dev

On Fri, Sep 09, 2022 at 09:57:23AM +0200, Joel Colledge wrote:
> Hi Michael,
> 
> Thanks for the patch!
> 
> > This commit mimics upstream commit a34592ff6b78, which removes all the
> > WRITE_SAME code because "REQ_OP_WRITE_SAME was only ever submitted by
> > the legacy Linux zeroing code, which has switched to use
> > REQ_OP_WRITE_ZEROES long ago."  WRITE_SAME was then removed from Linux
> > 5.18.
> 
> This is an interesting case. If we remove all the write-same code like
> this, then we either need to add it back in via compat, or we lose a
> feature. I think it is OK to lose the feature on 5.18+ kernels, since
> the rest of the kernel does not use it any more. On older kernels this
> might break real use cases.
> 
> There is also the case where our peer is running an older kernel and
> we are running 5.18+. I think we should worry about that after
> deciding what to do with peers that run the same kernel.
> 
> I've added Lars to CC because he is more familiar with the historical
> changes in this area.
> 
> Note: If we do remove the feature entirely, we should stop advertising
> the feature flag DRBD_FF_WSAME so that the peer knows that we do not
> support it.

Problem with that is: I overloaded this "feature flag":
/* supports REQ_WRITE_SAME on the "wire" protocol.
 * Note: this flag is overloaded,
 * its presence also
 *   - indicates support for 128 MiB "batch bios",
 *     max discard size of 128 MiB
 *     instead of 4M before that.
 *   - indicates that we exchange additional settings in p_sizes
 *     drbd_send_sizes()/receive_sizes()
 */

I did that to avoid too much special casing with protocol version numbers,
we had 8.4 and 9 version ranges, and introduced the feature on both.

Takeaway: do not overload feature flags :-|

My original commit message:
    drbd: introduce WRITE_SAME support
    
    We will support WRITE_SAME, if
     * all peers support WRITE_SAME (both in kernel and DRBD version),
     * all peer devices support WRITE_SAME
     * logical_block_size is identical on all peers.
    
    We may at some point introduce a fallback on the receiving side
    for devices/kernels that do not support WRITE_SAME,
    by open-coding a submit loop. But not yet.

I don't think "open code a compat fallback" is useful,
the only "relevant" usage of WSAME was ZERO-OUT,
which got its own flag in upstream kernel (and then DRBD) in 2018.

I don't think "WSAME" compat accross 8.4 <-> 9 is something we need to worry
about, I'm fine with not supporting it or the other things it indicated when
connecting 8.4 <-> 9, which should only be done temporarily.
Most of the time a "stop everything; upgrade all nodes; start everything"
will be more effective and have shorter overall downtime than a rolling upgrade
one-by-one, moving services by stopping, then starting on an other node.

But actually we do not need to remove the DRBD_FF_WSAME flag, because WSAME
will not be used even if the feature flag is present, if we don't indicate
support for it in the p_sizes per "peer device" aka volume.
we now always report p->qlim->write_same_capable = 0.

==> I think the patch is okay as is.

If we want, we can just rename the flag,
%s/DRBD_FF_WSAME/DRBD_FF_EXTENDED_QLIM/g or something.

Should we want to drop the flag,
for 9 <-> 9, git log (below) says that we can use PRO_VERSION >= 111
to indicate "128 MiB batch bio size" and "extended p_sizes".

2021-07-30 95adb51a1 drbd: Allow reverting resync direction by usind the --discard-my-data flag        #define PRO_VERSION_MAX 121
2021-04-14 43a7362fa drbd: new option for "invalidate(-remote)" commands: "--reset-bitmap=no"          #define PRO_VERSION_MAX 120
2021-01-25 4f2682e38 drbd: Generalize when resync direction is based on disk states                    #define PRO_VERSION_MAX 119
2020-10-16 e595bfc63 drbd: Serialize connects and promotes properly                                    #define PRO_VERSION_MAX 118
2020-06-19 1f59029d7 drbd: Send config details, size, data UUIDs, and initial state before first 2pc   #define PRO_VERSION_MAX 117
2019-10-23 2f144892f drbd: Allow resync of a re-created peer from day0 UUID                            #define PRO_VERSION_MAX 116
2019-05-24 50124b2b9 drbd: introduce P_RS_CANCEL_AHEAD                                                 #define PRO_VERSION_MAX 115
2018-07-19 3d98754ed drbd: protocol 114: fix distributed deadlock on secondary activity log            #define PRO_VERSION_MAX 114
2018-04-11 6036fafcc drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire")
2018-03-13 d987c27a7 drbd: Automatically resolve split-brain in a specific case with quorum            #define PRO_VERSION_MAX 113
2016-08-29 6708bd087 drbd: two-phase resize                                                            #define PRO_VERSION_MAX 112
2016-01-28 399b81433 fixed resize for multiple nodes                                                   #define PRO_VERSION_MAX 111
2015-12-04 aeee107ec drbd: introduce WRITE_SAME support
2012-01-24 6a4b0c000 drbd: Implement cluster-wide state changes using two-phase commit                 #define PRO_VERSION_MAX 110


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support
  2022-09-09 13:04     ` Lars Ellenberg
@ 2022-09-12 19:08       ` Michael Labriola
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Labriola @ 2022-09-12 19:08 UTC (permalink / raw)
  To: drbd-dev, Lars Ellenberg, Joel Colledge

On Fri, Sep 9, 2022 at 9:04 AM Lars Ellenberg <lars.ellenberg@linbit.com> wrote:
>
> On Fri, Sep 09, 2022 at 09:57:23AM +0200, Joel Colledge wrote:
> > Hi Michael,
> >
> > Thanks for the patch!
> >
> > > This commit mimics upstream commit a34592ff6b78, which removes all the
> > > WRITE_SAME code because "REQ_OP_WRITE_SAME was only ever submitted by
> > > the legacy Linux zeroing code, which has switched to use
> > > REQ_OP_WRITE_ZEROES long ago."  WRITE_SAME was then removed from Linux
> > > 5.18.
> >
> > This is an interesting case. If we remove all the write-same code like
> > this, then we either need to add it back in via compat, or we lose a
> > feature. I think it is OK to lose the feature on 5.18+ kernels, since
> > the rest of the kernel does not use it any more. On older kernels this
> > might break real use cases.
> >
> > There is also the case where our peer is running an older kernel and
> > we are running 5.18+. I think we should worry about that after
> > deciding what to do with peers that run the same kernel.
> >
> > I've added Lars to CC because he is more familiar with the historical
> > changes in this area.
> >
> > Note: If we do remove the feature entirely, we should stop advertising
> > the feature flag DRBD_FF_WSAME so that the peer knows that we do not
> > support it.
>
> Problem with that is: I overloaded this "feature flag":
> /* supports REQ_WRITE_SAME on the "wire" protocol.
>  * Note: this flag is overloaded,
>  * its presence also
>  *   - indicates support for 128 MiB "batch bios",
>  *     max discard size of 128 MiB
>  *     instead of 4M before that.
>  *   - indicates that we exchange additional settings in p_sizes
>  *     drbd_send_sizes()/receive_sizes()
>  */

Yeah, I stumbled onto that while on my first cut of this patch...
quickly backpedaled and did as little as possible in case there was
any other unanticipated ripple.

>
> I did that to avoid too much special casing with protocol version numbers,
> we had 8.4 and 9 version ranges, and introduced the feature on both.
>
> Takeaway: do not overload feature flags :-|
>
> My original commit message:
>     drbd: introduce WRITE_SAME support
>
>     We will support WRITE_SAME, if
>      * all peers support WRITE_SAME (both in kernel and DRBD version),
>      * all peer devices support WRITE_SAME
>      * logical_block_size is identical on all peers.
>
>     We may at some point introduce a fallback on the receiving side
>     for devices/kernels that do not support WRITE_SAME,
>     by open-coding a submit loop. But not yet.
>
> I don't think "open code a compat fallback" is useful,
> the only "relevant" usage of WSAME was ZERO-OUT,
> which got its own flag in upstream kernel (and then DRBD) in 2018.
>
> I don't think "WSAME" compat accross 8.4 <-> 9 is something we need to worry
> about, I'm fine with not supporting it or the other things it indicated when
> connecting 8.4 <-> 9, which should only be done temporarily.
> Most of the time a "stop everything; upgrade all nodes; start everything"
> will be more effective and have shorter overall downtime than a rolling upgrade
> one-by-one, moving services by stopping, then starting on an other node.
>
> But actually we do not need to remove the DRBD_FF_WSAME flag, because WSAME
> will not be used even if the feature flag is present, if we don't indicate
> support for it in the p_sizes per "peer device" aka volume.
> we now always report p->qlim->write_same_capable = 0.
>
> ==> I think the patch is okay as is.

Ok, good to hear.  I was a lot less sure of this one than other
"keeping up with mainline" patches I've worked on in the past.

I think I've gotten a little lost regarding the need for a compat
patch, though.  Are you saying that because the only real benefit of
WSAME is now taken care of by ZERO-OUT, there really is no need for a
compat patch to put WSAME back into the drbd9 tree (for pre-5.18
kernels)?

>
> If we want, we can just rename the flag,
> %s/DRBD_FF_WSAME/DRBD_FF_EXTENDED_QLIM/g or something.
>
> Should we want to drop the flag,
> for 9 <-> 9, git log (below) says that we can use PRO_VERSION >= 111
> to indicate "128 MiB batch bio size" and "extended p_sizes".
>
> 2021-07-30 95adb51a1 drbd: Allow reverting resync direction by usind the --discard-my-data flag        #define PRO_VERSION_MAX 121
> 2021-04-14 43a7362fa drbd: new option for "invalidate(-remote)" commands: "--reset-bitmap=no"          #define PRO_VERSION_MAX 120
> 2021-01-25 4f2682e38 drbd: Generalize when resync direction is based on disk states                    #define PRO_VERSION_MAX 119
> 2020-10-16 e595bfc63 drbd: Serialize connects and promotes properly                                    #define PRO_VERSION_MAX 118
> 2020-06-19 1f59029d7 drbd: Send config details, size, data UUIDs, and initial state before first 2pc   #define PRO_VERSION_MAX 117
> 2019-10-23 2f144892f drbd: Allow resync of a re-created peer from day0 UUID                            #define PRO_VERSION_MAX 116
> 2019-05-24 50124b2b9 drbd: introduce P_RS_CANCEL_AHEAD                                                 #define PRO_VERSION_MAX 115
> 2018-07-19 3d98754ed drbd: protocol 114: fix distributed deadlock on secondary activity log            #define PRO_VERSION_MAX 114
> 2018-04-11 6036fafcc drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire")
> 2018-03-13 d987c27a7 drbd: Automatically resolve split-brain in a specific case with quorum            #define PRO_VERSION_MAX 113
> 2016-08-29 6708bd087 drbd: two-phase resize                                                            #define PRO_VERSION_MAX 112
> 2016-01-28 399b81433 fixed resize for multiple nodes                                                   #define PRO_VERSION_MAX 111
> 2015-12-04 aeee107ec drbd: introduce WRITE_SAME support
> 2012-01-24 6a4b0c000 drbd: Implement cluster-wide state changes using two-phase commit                 #define PRO_VERSION_MAX 110
>
> _______________________________________________
> drbd-dev mailing list
> drbd-dev@lists.linbit.com
> https://lists.linbit.com/mailman/listinfo/drbd-dev

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Drbd-dev] [PATCH 3/3] drbd: use bio_alloc_clone() instead of bio_clone_fast()
  2022-09-08 21:13 ` [Drbd-dev] [PATCH 3/3] drbd: use bio_alloc_clone() instead of bio_clone_fast() Michael D Labriola
  2022-09-09  7:59   ` Joel Colledge
@ 2022-09-30 17:02   ` Christoph Böhmwalder
  1 sibling, 0 replies; 10+ messages in thread
From: Christoph Böhmwalder @ 2022-09-30 17:02 UTC (permalink / raw)
  To: Michael D Labriola; +Cc: drbd-dev

Am 08.09.22 um 23:13 schrieb Michael D Labriola:
> Between 5.17 and 5.18, bio_clone_fast() was modified to take a
> block_device and the function was renamed.  This commit migrates to
> the new usage.
> 
> Upstream commits:
> abfc426d block: pass a block_device to bio_clone_fast
> 
> Signed-off-by: Michael D Labriola <veggiemike@sourceruckus.org>
> ---
>  drbd/drbd_req.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drbd/drbd_req.c b/drbd/drbd_req.c
> index fc7dfb8d..06ba8ce8 100644
> --- a/drbd/drbd_req.c
> +++ b/drbd/drbd_req.c
> @@ -1715,7 +1715,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio,
>  	req->start_jif = bio_start_io_acct(req->master_bio);
>  
>  	if (get_ldev(device)) {
> -		req->private_bio  = bio_clone_fast(bio, GFP_NOIO, &drbd_io_bio_set);
> +		req->private_bio  = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, &drbd_io_bio_set);
>  		req->private_bio->bi_private = req;
>  		req->private_bio->bi_end_io = drbd_request_endio;
>  	}

Thanks for the patch!

Applied as
https://github.com/LINBIT/drbd/commit/945b5e58ad1761c1f6e874e0db7aee7f42d11827.

(And fixed up in
https://github.com/LINBIT/drbd/commit/7e7111b94b8ef100a6e3b5fea084105122ad0546
;)

-- 
Christoph Böhmwalder
LINBIT | Keeping the Digital World Running
DRBD HA —  Disaster Recovery — Software defined Storage

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-09-30 17:02 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-09-08 21:13 [Drbd-dev] [PATCH 0/3] 5.18 kernel patches w/out compat Michael D Labriola
2022-09-08 21:13 ` [Drbd-dev] [PATCH 1/3] drbd: remove WRITE_SAME support Michael D Labriola
2022-09-09  7:57   ` Joel Colledge
2022-09-09 13:04     ` Lars Ellenberg
2022-09-12 19:08       ` Michael Labriola
2022-09-08 21:13 ` [Drbd-dev] [PATCH 2/3] drbd: remove reliance on bdi congestion Michael D Labriola
2022-09-09  8:18   ` Joel Colledge
2022-09-08 21:13 ` [Drbd-dev] [PATCH 3/3] drbd: use bio_alloc_clone() instead of bio_clone_fast() Michael D Labriola
2022-09-09  7:59   ` Joel Colledge
2022-09-30 17:02   ` Christoph Böhmwalder

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox