* [PATCH 04/25] sd: implement REQ_OP_WRITE_ZEROES
From: Christoph Hellwig @ 2017-03-31 16:32 UTC (permalink / raw)
To: axboe, martin.petersen, agk, snitzer, shli, philipp.reisner,
lars.ellenberg
Cc: linux-block, linux-scsi, drbd-dev, dm-devel, linux-raid
In-Reply-To: <20170331163313.31821-1-hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/scsi/sd.c | 31 ++++++++++++++++++++++++++-----
drivers/scsi/sd_zbc.c | 1 +
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b853f91fb3da..d8d9c0bdd93c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -735,7 +735,7 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
return scsi_init_io(cmd);
}
-static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd)
+static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
{
struct scsi_device *sdp = cmd->device;
struct request *rq = cmd->request;
@@ -752,13 +752,14 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd)
cmd->cmd_len = 16;
cmd->cmnd[0] = WRITE_SAME_16;
- cmd->cmnd[1] = 0x8; /* UNMAP */
+ if (unmap)
+ cmd->cmnd[1] = 0x8; /* UNMAP */
put_unaligned_be64(sector, &cmd->cmnd[2]);
put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
cmd->allowed = SD_MAX_RETRIES;
cmd->transfersize = data_len;
- rq->timeout = SD_TIMEOUT;
+ rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
scsi_req(rq)->resid_len = data_len;
return scsi_init_io(cmd);
@@ -788,12 +789,27 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
cmd->allowed = SD_MAX_RETRIES;
cmd->transfersize = data_len;
- rq->timeout = SD_TIMEOUT;
+ rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
scsi_req(rq)->resid_len = data_len;
return scsi_init_io(cmd);
}
+static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
+{
+ struct request *rq = cmd->request;
+ struct scsi_device *sdp = cmd->device;
+ struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+ u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+
+ if (sdp->no_write_same)
+ return BLKPREP_INVALID;
+ if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
+ return sd_setup_write_same16_cmnd(cmd, false);
+ return sd_setup_write_same10_cmnd(cmd, false);
+}
+
static void sd_config_write_same(struct scsi_disk *sdkp)
{
struct request_queue *q = sdkp->disk->queue;
@@ -823,6 +839,8 @@ static void sd_config_write_same(struct scsi_disk *sdkp)
out:
blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks *
(logical_block_size >> 9));
+ blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks *
+ (logical_block_size >> 9));
}
/**
@@ -1163,7 +1181,7 @@ static int sd_init_command(struct scsi_cmnd *cmd)
case SD_LBP_UNMAP:
return sd_setup_unmap_cmnd(cmd);
case SD_LBP_WS16:
- return sd_setup_write_same16_cmnd(cmd);
+ return sd_setup_write_same16_cmnd(cmd, true);
case SD_LBP_WS10:
return sd_setup_write_same10_cmnd(cmd, true);
case SD_LBP_ZERO:
@@ -1171,6 +1189,8 @@ static int sd_init_command(struct scsi_cmnd *cmd)
default:
return BLKPREP_INVALID;
}
+ case REQ_OP_WRITE_ZEROES:
+ return sd_setup_write_zeroes_cmnd(cmd);
case REQ_OP_WRITE_SAME:
return sd_setup_write_same_cmnd(cmd);
case REQ_OP_FLUSH:
@@ -1810,6 +1830,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
switch (req_op(req)) {
case REQ_OP_DISCARD:
+ case REQ_OP_WRITE_ZEROES:
case REQ_OP_WRITE_SAME:
case REQ_OP_ZONE_RESET:
if (!result) {
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 92620c8ea8ad..1994f7799fce 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -329,6 +329,7 @@ void sd_zbc_complete(struct scsi_cmnd *cmd,
switch (req_op(rq)) {
case REQ_OP_WRITE:
+ case REQ_OP_WRITE_ZEROES:
case REQ_OP_WRITE_SAME:
case REQ_OP_ZONE_RESET:
--
2.11.0
^ permalink raw reply related
* [PATCH 03/25] block: implement splitting of REQ_OP_WRITE_ZEROES bios
From: Christoph Hellwig @ 2017-03-31 16:32 UTC (permalink / raw)
To: axboe, martin.petersen, agk, snitzer, shli, philipp.reisner,
lars.ellenberg
Cc: linux-block, linux-scsi, drbd-dev, dm-devel, linux-raid
In-Reply-To: <20170331163313.31821-1-hch@lst.de>
Copy and past the REQ_OP_WRITE_SAME code to prepare to implementations
that limit the write zeroes size.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-merge.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 2afa262425d1..3990ae406341 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -54,6 +54,20 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
return bio_split(bio, split_sectors, GFP_NOIO, bs);
}
+static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
+ struct bio *bio, struct bio_set *bs, unsigned *nsegs)
+{
+ *nsegs = 1;
+
+ if (!q->limits.max_write_zeroes_sectors)
+ return NULL;
+
+ if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
+ return NULL;
+
+ return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
+}
+
static struct bio *blk_bio_write_same_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs,
@@ -200,8 +214,7 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
break;
case REQ_OP_WRITE_ZEROES:
- split = NULL;
- nsegs = (*bio)->bi_phys_segments;
+ split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
break;
case REQ_OP_WRITE_SAME:
split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
--
2.11.0
^ permalink raw reply related
* [PATCH 02/25] block: renumber REQ_OP_WRITE_ZEROES
From: Christoph Hellwig @ 2017-03-31 16:32 UTC (permalink / raw)
To: axboe, martin.petersen, agk, snitzer, shli, philipp.reisner,
lars.ellenberg
Cc: linux-block, linux-scsi, drbd-dev, dm-devel, linux-raid
In-Reply-To: <20170331163313.31821-1-hch@lst.de>
Make life easy for implementations that needs to send a data buffer
to the device (e.g. SCSI) by numbering it as a data out command.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/blk_types.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 67bcf8a5326e..4eae30bfbfca 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -168,7 +168,7 @@ enum req_opf {
/* write the same sector many times */
REQ_OP_WRITE_SAME = 7,
/* write the zero filled sector many times */
- REQ_OP_WRITE_ZEROES = 8,
+ REQ_OP_WRITE_ZEROES = 9,
/* SCSI passthrough using struct scsi_request */
REQ_OP_SCSI_IN = 32,
--
2.11.0
^ permalink raw reply related
* [PATCH 01/25] ѕd: split sd_setup_discard_cmnd
From: Christoph Hellwig @ 2017-03-31 16:32 UTC (permalink / raw)
To: axboe, martin.petersen, agk, snitzer, shli, philipp.reisner,
lars.ellenberg
Cc: linux-block, linux-scsi, drbd-dev, dm-devel, linux-raid
In-Reply-To: <20170331163313.31821-1-hch@lst.de>
Split sd_setup_discard_cmnd into one function per provisioning type. While
this creates some very slight duplication of boilerplate code it keeps the
code modular for additions of new provisioning types, and for reusing the
write same functions for the upcoming scsi implementation of the Write Zeroes
operation.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/scsi/sd.c | 153 ++++++++++++++++++++++++++++++------------------------
1 file changed, 84 insertions(+), 69 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fcfeddc79331..b853f91fb3da 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -701,93 +701,97 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
-/**
- * sd_setup_discard_cmnd - unmap blocks on thinly provisioned device
- * @sdp: scsi device to operate on
- * @rq: Request to prepare
- *
- * Will issue either UNMAP or WRITE SAME(16) depending on preference
- * indicated by target device.
- **/
-static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
+static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
{
- struct request *rq = cmd->request;
struct scsi_device *sdp = cmd->device;
- struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
- sector_t sector = blk_rq_pos(rq);
- unsigned int nr_sectors = blk_rq_sectors(rq);
- unsigned int len;
- int ret;
+ struct request *rq = cmd->request;
+ u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+ unsigned int data_len = 24;
char *buf;
- struct page *page;
- sector >>= ilog2(sdp->sector_size) - 9;
- nr_sectors >>= ilog2(sdp->sector_size) - 9;
-
- page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
- if (!page)
+ rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!rq->special_vec.bv_page)
return BLKPREP_DEFER;
+ rq->special_vec.bv_offset = 0;
+ rq->special_vec.bv_len = data_len;
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
- switch (sdkp->provisioning_mode) {
- case SD_LBP_UNMAP:
- buf = page_address(page);
-
- cmd->cmd_len = 10;
- cmd->cmnd[0] = UNMAP;
- cmd->cmnd[8] = 24;
-
- put_unaligned_be16(6 + 16, &buf[0]);
- put_unaligned_be16(16, &buf[2]);
- put_unaligned_be64(sector, &buf[8]);
- put_unaligned_be32(nr_sectors, &buf[16]);
+ cmd->cmd_len = 10;
+ cmd->cmnd[0] = UNMAP;
+ cmd->cmnd[8] = 24;
- len = 24;
- break;
+ buf = page_address(rq->special_vec.bv_page);
+ put_unaligned_be16(6 + 16, &buf[0]);
+ put_unaligned_be16(16, &buf[2]);
+ put_unaligned_be64(sector, &buf[8]);
+ put_unaligned_be32(nr_sectors, &buf[16]);
- case SD_LBP_WS16:
- cmd->cmd_len = 16;
- cmd->cmnd[0] = WRITE_SAME_16;
- cmd->cmnd[1] = 0x8; /* UNMAP */
- put_unaligned_be64(sector, &cmd->cmnd[2]);
- put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
+ cmd->allowed = SD_MAX_RETRIES;
+ cmd->transfersize = data_len;
+ rq->timeout = SD_TIMEOUT;
+ scsi_req(rq)->resid_len = data_len;
- len = sdkp->device->sector_size;
- break;
+ return scsi_init_io(cmd);
+}
- case SD_LBP_WS10:
- case SD_LBP_ZERO:
- cmd->cmd_len = 10;
- cmd->cmnd[0] = WRITE_SAME;
- if (sdkp->provisioning_mode == SD_LBP_WS10)
- cmd->cmnd[1] = 0x8; /* UNMAP */
- put_unaligned_be32(sector, &cmd->cmnd[2]);
- put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
+static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd)
+{
+ struct scsi_device *sdp = cmd->device;
+ struct request *rq = cmd->request;
+ u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 data_len = sdp->sector_size;
- len = sdkp->device->sector_size;
- break;
+ rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!rq->special_vec.bv_page)
+ return BLKPREP_DEFER;
+ rq->special_vec.bv_offset = 0;
+ rq->special_vec.bv_len = data_len;
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
- default:
- ret = BLKPREP_INVALID;
- goto out;
- }
+ cmd->cmd_len = 16;
+ cmd->cmnd[0] = WRITE_SAME_16;
+ cmd->cmnd[1] = 0x8; /* UNMAP */
+ put_unaligned_be64(sector, &cmd->cmnd[2]);
+ put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
+ cmd->allowed = SD_MAX_RETRIES;
+ cmd->transfersize = data_len;
rq->timeout = SD_TIMEOUT;
+ scsi_req(rq)->resid_len = data_len;
- cmd->transfersize = len;
- cmd->allowed = SD_MAX_RETRIES;
+ return scsi_init_io(cmd);
+}
- rq->special_vec.bv_page = page;
- rq->special_vec.bv_offset = 0;
- rq->special_vec.bv_len = len;
+static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
+{
+ struct scsi_device *sdp = cmd->device;
+ struct request *rq = cmd->request;
+ u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 data_len = sdp->sector_size;
+ rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!rq->special_vec.bv_page)
+ return BLKPREP_DEFER;
+ rq->special_vec.bv_offset = 0;
+ rq->special_vec.bv_len = data_len;
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
- scsi_req(rq)->resid_len = len;
- ret = scsi_init_io(cmd);
-out:
- if (ret != BLKPREP_OK)
- __free_page(page);
- return ret;
+ cmd->cmd_len = 10;
+ cmd->cmnd[0] = WRITE_SAME;
+ if (unmap)
+ cmd->cmnd[1] = 0x8; /* UNMAP */
+ put_unaligned_be32(sector, &cmd->cmnd[2]);
+ put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
+
+ cmd->allowed = SD_MAX_RETRIES;
+ cmd->transfersize = data_len;
+ rq->timeout = SD_TIMEOUT;
+ scsi_req(rq)->resid_len = data_len;
+
+ return scsi_init_io(cmd);
}
static void sd_config_write_same(struct scsi_disk *sdkp)
@@ -1155,7 +1159,18 @@ static int sd_init_command(struct scsi_cmnd *cmd)
switch (req_op(rq)) {
case REQ_OP_DISCARD:
- return sd_setup_discard_cmnd(cmd);
+ switch (scsi_disk(rq->rq_disk)->provisioning_mode) {
+ case SD_LBP_UNMAP:
+ return sd_setup_unmap_cmnd(cmd);
+ case SD_LBP_WS16:
+ return sd_setup_write_same16_cmnd(cmd);
+ case SD_LBP_WS10:
+ return sd_setup_write_same10_cmnd(cmd, true);
+ case SD_LBP_ZERO:
+ return sd_setup_write_same10_cmnd(cmd, false);
+ default:
+ return BLKPREP_INVALID;
+ }
case REQ_OP_WRITE_SAME:
return sd_setup_write_same_cmnd(cmd);
case REQ_OP_FLUSH:
--
2.11.0
^ permalink raw reply related
* always use REQ_OP_WRITE_ZEROES for zeroing offload
From: Christoph Hellwig @ 2017-03-31 16:32 UTC (permalink / raw)
To: axboe, martin.petersen, agk, snitzer, shli, philipp.reisner,
lars.ellenberg
Cc: linux-block, linux-scsi, drbd-dev, dm-devel, linux-raid
This series makes REQ_OP_WRITE_ZEROES the only zeroing offload
supported by the block layer, and switches existing implementations
of REQ_OP_DISCARD that correctly set discard_zeroes_data to it,
removes incorrect discard_zeroes_data, and also switches WRITE SAME
based zeroing in SCSI to this new method.
The series is against the block for-next tree.
A git tree is also avaiable at:
git://git.infradead.org/users/hch/block.git discard-rework
Gitweb:
http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/discard-rework
^ permalink raw reply
* Re: [PATCH 12/23] sd: handle REQ_UNMAP
From: hch-jcswGhMUV9g @ 2017-03-31 7:18 UTC (permalink / raw)
To: Martin K. Petersen
Cc: axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org,
linux-raid-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
philipp.reisner-63ez5xqkn6DQT0dZR+AlfA@public.gmane.org,
linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
dm-devel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org, Bart Van Assche,
lars.ellenberg-63ez5xqkn6DQT0dZR+AlfA@public.gmane.org,
shli-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
hch-jcswGhMUV9g@public.gmane.org,
agk-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
drbd-dev-cunTk1MwBs8qoQakbn7OcQ@public.gmane.org
In-Reply-To: <yq17f36yypg.fsf-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
On Thu, Mar 30, 2017 at 10:19:55PM -0400, Martin K. Petersen wrote:
> > If you manually change the provisioning mode to WS10 on a device that
> > must use WRITE SAME (16) to be able to address all blocks you're already
> > screwed right now, and with this patch you can screw yourself through
> > the WRITE_ZEROES path in addition to the DISCARD path.
>
> Oh, I see. We only had the LBA sanity check in place for write same, not
> for discard.
And btw, I'd be happy to add such a check, I'd just rather keep it out
of this patch. It might be a good idea if you give it a turn after
this series given that you have all the devices that have weird
provisioning modes while I don't..
^ permalink raw reply
* Re: [PATCH 22/23] drbd: implement REQ_OP_WRITE_ZEROES
From: Christoph Hellwig @ 2017-03-31 7:17 UTC (permalink / raw)
To: Mike Snitzer
Cc: Martin K. Petersen, Christoph Hellwig, axboe, agk, shli,
philipp.reisner, linux-block, linux-scsi, drbd-dev, dm-devel,
linux-raid
In-Reply-To: <20170330231550.GA3102@redhat.com>
On Thu, Mar 30, 2017 at 07:15:50PM -0400, Mike Snitzer wrote:
> I got pretty far along with implementing the DM thinp support for
> WRITE_ZEROES in terms of thinp's DISCARD support (more of an
> implementation detail.. or so I thought).
>
> But while discussing this effort with Jeff Moyer he asked: shouldn't the
> zeroed blocks be provisioned? This is a fairly embarassing question not
> to be able to answer in the moment. So I clearly need to learn what the
> overall intent of WRITE_ZEROES actually is.
It is to ensure the that the block range it's called on returns zeroes
on future reads. Currently if REQ_UNMAP is set you may free the space
allocation, else not.
After looking at the callers I think this is the wrong way around, so
I'm going to invert the flag, so that the two callers that care that
blocks are allocated will set it instead.
^ permalink raw reply
* Re: Re-assembling array after double device failure
From: Andy Smith @ 2017-03-31 4:25 UTC (permalink / raw)
To: linux-raid
In-Reply-To: <eb7bebad-f173-b6e9-75f8-182148eccc46@youngman.org.uk>
Hi Anthony,
On Mon, Mar 27, 2017 at 04:23:19PM +0100, Anthony Youngman wrote:
> Hopefully fixing the timeout, followed by a --assemble --force, then
> a scrub, will be all that's required.
Yep, thanks. Luckily it assembled without --force, and I was then
able to replace the older drive with a new one and rebuild onto it.
It completed that without error and seems happy now.
I'm going to subject the removed drive that got kicked out first to
some more in-depth scrutiny in another machine as soon as I get
chance.
Cheers,
Andy
^ permalink raw reply
* Re: [PATCH 22/23] drbd: implement REQ_OP_WRITE_ZEROES
From: Martin K. Petersen @ 2017-03-31 2:34 UTC (permalink / raw)
To: Mike Snitzer
Cc: axboe-tSWWG44O7X1aa/9Udqfwiw, linux-raid-u79uwXL29TY76Z2rM5mHXA,
linux-scsi-u79uwXL29TY76Z2rM5mHXA, Martin K. Petersen,
philipp.reisner-63ez5xqkn6DQT0dZR+AlfA,
linux-block-u79uwXL29TY76Z2rM5mHXA,
dm-devel-H+wXaHxf7aLQT0dZR+AlfA, shli-DgEjT+Ai2ygdnm+yROfE0A,
Christoph Hellwig, agk-H+wXaHxf7aLQT0dZR+AlfA,
drbd-dev-cunTk1MwBs8qoQakbn7OcQ
In-Reply-To: <20170330231550.GA3102-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Mike Snitzer <snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> writes:
Mike,
> But while discussing this effort with Jeff Moyer he asked: shouldn't the
> zeroed blocks be provisioned? This is a fairly embarassing question not
> to be able to answer in the moment. So I clearly need to learn what the
> overall intent of WRITE_ZEROES actually is.
The intent is to guarantee all future reads to these blocks will return
zeroes. Whether to allocate or deallocate or do a combination to achieve
that goal is up to the device.
> If it is meant as a replacement for WRITE_SAME (as hch switched dm-io
> over from WRITE_SAME with a payload of 0 to WRITE_ZEROES) and for the
> backing mechanism for blkdev_issue_zeroout() then I guess I have my
> answer.
Yes. I was hoping MD would use WRITE SAME to initialize data and parity
drives. That's why I went with SAME nomenclature rather than ZEROES
(which had just appeared in NVMe at that time).
Christoph's renaming is mostly to emphasize the purpose and the
semantics. People keep getting confused because both REQ_DISCARD and
REQ_WRITE_SAME use the WRITE SAME SCSI command. But the two uses are
completely orthogonal and governed by different heuristics in sd.c.
> Unless DM thinp can guarantee that the discarded blocks will
> always return zeroes (by zeroing before all partial block writes) my
> discard based dm-thinp implementation of WRITE_ZEROES is a complete
> throw-away (unless block zeroing is enabled.. which it never is because
> performance sucks with it). So if an upper-level of the IO stack
> (e.g. ext4) were to assume that a block will _definitely_ have zeroes
> then DM thinp would fall short.
You don't have a way to mark those blocks as being full of zeroes
without actually writing them?
Note that the fallback to a zeroout command is to do a regular write. So
if DM doesn't zero the blocks, the block layer is going to it.
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply
* Re: [PATCH 12/23] sd: handle REQ_UNMAP
From: Martin K. Petersen @ 2017-03-31 2:19 UTC (permalink / raw)
To: hch@lst.de
Cc: axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org,
linux-raid-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Martin K. Petersen,
snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
philipp.reisner-63ez5xqkn6DQT0dZR+AlfA@public.gmane.org,
linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
dm-devel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Bart Van Assche,
lars.ellenberg-63ez5xqkn6DQT0dZR+AlfA@public.gmane.org,
shli-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
agk-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
drbd-dev-cunTk1MwBs8qoQakbn7OcQ@public.gmane.org
In-Reply-To: <20170330173020.GB24229-jcswGhMUV9g@public.gmane.org>
"hch-jcswGhMUV9g@public.gmane.org" <hch-jcswGhMUV9g@public.gmane.org> writes:
> If you manually change the provisioning mode to WS10 on a device that
> must use WRITE SAME (16) to be able to address all blocks you're already
> screwed right now, and with this patch you can screw yourself through
> the WRITE_ZEROES path in addition to the DISCARD path.
Oh, I see. We only had the LBA sanity check in place for write same, not
for discard.
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply
* Re: [PATCH 22/23] drbd: implement REQ_OP_WRITE_ZEROES
From: Mike Snitzer @ 2017-03-30 23:15 UTC (permalink / raw)
To: Martin K. Petersen
Cc: Christoph Hellwig, axboe, agk, shli, philipp.reisner, linux-block,
linux-scsi, drbd-dev, dm-devel, linux-raid
In-Reply-To: <yq1pogy3i6u.fsf@oracle.com>
On Thu, Mar 30 2017 at 11:20am -0400,
Martin K. Petersen <martin.petersen@oracle.com> wrote:
> Mike Snitzer <snitzer@redhat.com> writes:
>
> > I can work on this now. Only question I have is: should DM thinp take
> > care to zero any misaligned head and tail? (I assume so but with all
> > the back and forth between Bart, Paolo and Martin I figured I'd ask
> > explicitly).
>
> Yep, let's make sure our semantics match the hardware ditto.
>
> - So write zeroes should behave deterministically and explicitly handle
> any blocks that can't be cleared via deprovisioning.
>
> - And discard can work at the discard granularity in a
> non-deterministic fashion.
I got pretty far along with implementing the DM thinp support for
WRITE_ZEROES in terms of thinp's DISCARD support (more of an
implementation detail.. or so I thought).
But while discussing this effort with Jeff Moyer he asked: shouldn't the
zeroed blocks be provisioned? This is a fairly embarassing question not
to be able to answer in the moment. So I clearly need to learn what the
overall intent of WRITE_ZEROES actually is.
If it is meant as a replacement for WRITE_SAME (as hch switched dm-io
over from WRITE_SAME with a payload of 0 to WRITE_ZEROES) and for the
backing mechanism for blkdev_issue_zeroout() then I guess I have my
answer. Unless DM thinp can guarantee that the discarded blocks will
always return zeroes (by zeroing before all partial block writes) my
discard based dm-thinp implementation of WRITE_ZEROES is a complete
throw-away (unless block zeroing is enabled.. which it never is because
performance sucks with it). So if an upper-level of the IO stack
(e.g. ext4) were to assume that a block will _definitely_ have zeroes
then DM thinp would fall short.
This is all to say: I don't see a quick way forward on implementing
performant WRITE_ZEROES support for DM thinp.
^ permalink raw reply
* Re: [PATCH][RFC] dm raid: Fix NULL pointer dereference for raid1 without bitmap
From: Heinz Mauelshagen @ 2017-03-30 19:30 UTC (permalink / raw)
To: kmeaw, linux-kernel, dm-devel, linux-raid
Cc: Андрей Сметанин,
shli, agk
In-Reply-To: <101041490886866@webcorp01f.yandex-team.ru>
Acked-by: Heinz Mauelshagen <heinzm@redhat.com>
This is the simplest way to solve the issue at hand
(bitmap_load() returns success with non-existing bitmap).
Heinz
On 03/30/2017 05:14 PM, kmeaw@yandex-team.ru wrote:
> From: Dmitry Bilunov <kmeaw@yandex-team.ru>
>
> 4257e08 introduces a bitmap resize call during preresume phase. User
> can create a DM device with "raid" target configured as raid1 with no
> metadata devices to hold superblock/bitmap info. It can be achieved
> using the following sequence:
>
> truncate -s 32M /dev/shm/raid-test
> LOOP=$(losetup --show -f /dev/shm/raid-test)
> dmsetup create raid-test-linear0 --table "0 1024 linear $LOOP 0"
> dmsetup create raid-test-linear1 --table "0 1024 linear $LOOP 1024"
> dmsetup create raid-test --table "0 1024 raid raid1 1 2048 2 - /dev/mapper/raid-test-linear0 - /dev/mapper/raid-test-linear1"
>
> It results in
>
> Killed
> [ 4029.110216] device-mapper: raid: Ignoring chunk size parameter for RAID 1
> [ 4029.110217] device-mapper: raid: Choosing default region size of 4MiB
> [ 4029.111349] md/raid1:mdX: active with 2 out of 2 mirrors
> [ 4029.114770] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
> [ 4029.114802] IP: bitmap_resize+0x25/0x7c0 [md_mod]
> [ 4029.114816] PGD 0
> …
> [ 4029.115059] Hardware name: Aquarius Pro P30 S85 BUY-866/B85M-E, BIOS 2304 05/25/2015
> [ 4029.115079] task: ffff88015cc29a80 task.stack: ffffc90001a5c000
> [ 4029.115097] RIP: 0010:bitmap_resize+0x25/0x7c0 [md_mod]
> [ 4029.115112] RSP: 0018:ffffc90001a5fb68 EFLAGS: 00010246
> [ 4029.115127] RAX: 0000000000000005 RBX: 0000000000000000 RCX: 0000000000000000
> [ 4029.115146] RDX: 0000000000000000 RSI: 0000000000000400 RDI: 0000000000000000
> [ 4029.115166] RBP: ffffc90001a5fc28 R08: 0000000800000000 R09: 00000008ffffffff
> [ 4029.115185] R10: ffffea0005661600 R11: ffff88015cc29a80 R12: ffff88021231f058
> [ 4029.115204] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> [ 4029.115223] FS: 00007fe73a6b4740(0000) GS:ffff88021ea80000(0000) knlGS:0000000000000000
> [ 4029.115245] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 4029.115261] CR2: 0000000000000030 CR3: 0000000159a74000 CR4: 00000000001426e0
> [ 4029.115281] Call Trace:
> [ 4029.115291] ? raid_iterate_devices+0x63/0x80 [dm_raid]
> [ 4029.115309] ? dm_table_all_devices_attribute.isra.23+0x41/0x70 [dm_mod]
> [ 4029.115329] ? dm_table_set_restrictions+0x225/0x2d0 [dm_mod]
> [ 4029.115346] raid_preresume+0x81/0x2e0 [dm_raid]
> [ 4029.115361] dm_table_resume_targets+0x47/0xe0 [dm_mod]
> [ 4029.115378] dm_resume+0xa8/0xd0 [dm_mod]
> [ 4029.115391] dev_suspend+0x123/0x250 [dm_mod]
> [ 4029.115405] ? table_load+0x350/0x350 [dm_mod]
> [ 4029.115419] ctl_ioctl+0x1c2/0x490 [dm_mod]
> [ 4029.115433] dm_ctl_ioctl+0xe/0x20 [dm_mod]
> [ 4029.115447] do_vfs_ioctl+0x8d/0x5a0
> [ 4029.115459] ? ____fput+0x9/0x10
> [ 4029.115470] ? task_work_run+0x79/0xa0
> [ 4029.115481] SyS_ioctl+0x3c/0x70
> [ 4029.115493] entry_SYSCALL_64_fastpath+0x13/0x94
>
> Function raid_preresume takes an assumption that raid_set has a bitmap enabled.
> Call to bitmap_load(&rs->md) inside __load_dirty_region_bitmap would return 0 even
> if there is no bitmap present.
>
> This patch avoids resizing an absent bitmap.
>
> Signed-off-by: Dmitry Bilunov <kmeaw@yandex-team.ru>
> Signed-off-by: Andrey Smetanin <asmetanin@yandex-team.ru>
> ---
> drivers/md/dm-raid.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
> index f8564d63..67a5405e 100644
> --- a/drivers/md/dm-raid.c
> +++ b/drivers/md/dm-raid.c
> @@ -3727,6 +3727,7 @@ static int raid_preresume(struct dm_target *ti)
>
> /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
> if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) &&
> + mddev->bitmap &&
> mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
> r = bitmap_resize(mddev->bitmap, mddev->dev_sectors,
> to_bytes(rs->requested_bitmap_chunk_sectors), 0);
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply
* Re: [PATCH] mdadm/util:integrate fstat and stat into a utility function
From: Jes Sorensen @ 2017-03-30 18:46 UTC (permalink / raw)
To: Zhilong Liu; +Cc: linux-raid
In-Reply-To: <20170330044622.2397-1-zlliu@suse.com>
On 03/30/2017 12:46 AM, Zhilong Liu wrote:
> mdadm/util: lots of dupilicate codes about stat and fstat
> check the block device, move them into a utility function
> to make it concise.
>
> Signed-off-by: Zhilong Liu <zlliu@suse.com>
> ---
> Assemble.c | 9 +--------
> Build.c | 10 +---------
> Create.c | 5 +----
> Dump.c | 3 +--
> Grow.c | 4 +---
> Incremental.c | 27 +++------------------------
> Manage.c | 9 +--------
> mdadm.h | 1 +
> super-intel.c | 9 +--------
> util.c | 15 +++++++++++++++
> 10 files changed, 26 insertions(+), 66 deletions(-)
I'm a little mixed on this one. Defaulting to stat() rather than fstat()
seems like a sledgehammer approach for the cases where we already have
the fd open. I would prefer to default to fstat or have two versions I'd
say.
Cheers,
Jes
> diff --git a/Assemble.c b/Assemble.c
> index 6a6a56b..d3bfaa2 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -204,14 +204,7 @@ static int select_devices(struct mddev_dev *devlist,
> pr_err("cannot open device %s: %s\n",
> devname, strerror(errno));
> tmpdev->used = 2;
> - } else if (fstat(dfd, &stb)< 0) {
> - /* Impossible! */
> - pr_err("fstat failed for %s: %s\n",
> - devname, strerror(errno));
> - tmpdev->used = 2;
> - } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
> - pr_err("%s is not a block device.\n",
> - devname);
> + } else if (check_block_dev(devname)) {
> tmpdev->used = 2;
> } else if (must_be_container(dfd)) {
> if (st) {
> diff --git a/Build.c b/Build.c
> index a5fcc06..4fa606c 100644
> --- a/Build.c
> +++ b/Build.c
> @@ -67,16 +67,8 @@ int Build(char *mddev, struct mddev_dev *devlist,
> missing_disks++;
> continue;
> }
> - if (stat(dv->devname, &stb)) {
> - pr_err("Cannot find %s: %s\n",
> - dv->devname, strerror(errno));
> - return 1;
> - }
> - if ((stb.st_mode & S_IFMT) != S_IFBLK) {
> - pr_err("%s is not a block device.\n",
> - dv->devname);
> + if (check_block_dev(dv->devname))
> return 1;
> - }
> }
>
> if (s->raiddisks != subdevs) {
> diff --git a/Create.c b/Create.c
> index 10e7d10..dbd9ce3 100644
> --- a/Create.c
> +++ b/Create.c
> @@ -327,11 +327,8 @@ int Create(struct supertype *st, char *mddev,
> dname, strerror(errno));
> exit(2);
> }
> - if (fstat(dfd, &stb) != 0 ||
> - (stb.st_mode & S_IFMT) != S_IFBLK) {
> + if (check_block_dev(dname)) {
> close(dfd);
> - pr_err("%s is not a block device\n",
> - dname);
> exit(2);
> }
> close(dfd);
> diff --git a/Dump.c b/Dump.c
> index 7bdbf6f..29fc02a 100644
> --- a/Dump.c
> +++ b/Dump.c
> @@ -131,8 +131,7 @@ int Dump_metadata(char *dev, char *dir, struct context *c,
> if (de->d_name[0] == '.')
> continue;
> xasprintf(&p, "/dev/disk/by-id/%s", de->d_name);
> - if (stat(p, &stb) != 0 ||
> - (stb.st_mode & S_IFMT) != S_IFBLK ||
> + if (check_block_dev(p) != 0 ||
> stb.st_rdev != dstb.st_rdev) {
> /* Not this one */
> free(p);
> diff --git a/Grow.c b/Grow.c
> index b86b53e..063996d 100755
> --- a/Grow.c
> +++ b/Grow.c
> @@ -145,9 +145,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
> free(st);
> return 1;
> }
> - fstat(nfd, &stb);
> - if ((stb.st_mode & S_IFMT) != S_IFBLK) {
> - pr_err("%s is not a block device!\n", newdev);
> + if (check_block_dev(newdev)) {
> close(nfd);
> free(st);
> return 1;
> diff --git a/Incremental.c b/Incremental.c
> index 81afc7e..b2fad39 100644
> --- a/Incremental.c
> +++ b/Incremental.c
> @@ -108,18 +108,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
>
> struct createinfo *ci = conf_get_create_info();
>
> - if (stat(devname, &stb) < 0) {
> - if (c->verbose >= 0)
> - pr_err("stat failed for %s: %s.\n",
> - devname, strerror(errno));
> + if (check_block_dev(devname))
> return rv;
> - }
> - if ((stb.st_mode & S_IFMT) != S_IFBLK) {
> - if (c->verbose >= 0)
> - pr_err("%s is not a block device.\n",
> - devname);
> - return rv;
> - }
> dfd = dev_open(devname, O_RDONLY);
> if (dfd < 0) {
> if (c->verbose >= 0)
> @@ -159,8 +149,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
> devlist = conf_get_devs();
> for (;devlist; devlist = devlist->next) {
> struct stat st2;
> - if (stat(devlist->devname, &st2) == 0 &&
> - (st2.st_mode & S_IFMT) == S_IFBLK &&
> + if (check_block_dev(devlist->devname) == 0 &&
> st2.st_rdev == stb.st_rdev)
> break;
> }
> @@ -175,18 +164,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
> /* 2/ Find metadata, reject if none appropriate (check
> * version/name from args) */
>
> - if (fstat(dfd, &stb) < 0) {
> - if (c->verbose >= 0)
> - pr_err("fstat failed for %s: %s.\n",
> - devname, strerror(errno));
> + if (check_block_dev(devname))
> goto out;
> - }
> - if ((stb.st_mode & S_IFMT) != S_IFBLK) {
> - if (c->verbose >= 0)
> - pr_err("%s is not a block device.\n",
> - devname);
> - goto out;
> - }
>
> dinfo.disk.major = major(stb.st_rdev);
> dinfo.disk.minor = minor(stb.st_rdev);
> diff --git a/Manage.c b/Manage.c
> index 55218d9..8687084 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -1543,17 +1543,10 @@ int Manage_subdevs(char *devname, int fd,
> close(tfd);
> } else {
> int open_err = errno;
> - if (stat(dv->devname, &stb) != 0) {
> - pr_err("Cannot find %s: %s\n",
> - dv->devname, strerror(errno));
> - goto abort;
> - }
> - if ((stb.st_mode & S_IFMT) != S_IFBLK) {
> + if (check_block_dev(dv->devname) != 0) {
> if (dv->disposition == 'M')
> /* non-fatal. Also improbable */
> continue;
> - pr_err("%s is not a block device.\n",
> - dv->devname);
> goto abort;
> }
> if (dv->disposition == 'r')
> diff --git a/mdadm.h b/mdadm.h
> index dbf1f92..ad43a35 100644
> --- a/mdadm.h
> +++ b/mdadm.h
> @@ -1416,6 +1416,7 @@ extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
> extern int check_ext2(int fd, char *name);
> extern int check_reiser(int fd, char *name);
> extern int check_raid(int fd, char *name);
> +extern int check_block_dev(char *dev);
> extern int check_partitions(int fd, char *dname,
> unsigned long long freesize,
> unsigned long long size);
> diff --git a/super-intel.c b/super-intel.c
> index 785488a..21b5887 100644
> --- a/super-intel.c
> +++ b/super-intel.c
> @@ -6584,14 +6584,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
> dprintf("cannot open device %s: %s\n",
> devname, strerror(errno));
> tmpdev->used = 2;
> - } else if (fstat(dfd, &stb)< 0) {
> - /* Impossible! */
> - dprintf("fstat failed for %s: %s\n",
> - devname, strerror(errno));
> - tmpdev->used = 2;
> - } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
> - dprintf("%s is not a block device.\n",
> - devname);
> + } else if (check_block_dev(devname)) {
> tmpdev->used = 2;
> } else if (must_be_container(dfd)) {
> struct supertype *cst;
> diff --git a/util.c b/util.c
> index 683c869..e7aeb54 100644
> --- a/util.c
> +++ b/util.c
> @@ -750,6 +750,21 @@ int ask(char *mesg)
> }
> #endif /* MDASSEMBLE */
>
> +int check_block_dev(char *dev)
> +{
> + struct stat stb;
> +
> + if (stat(dev, &stb) != 0) {
> + pr_err("stat failed for %s: %s\n", dev, strerror(errno));
> + return -1;
> + }
> + if ((S_IFMT & stb.st_mode) != S_IFBLK) {
> + pr_err("%s is not a block device.\n", dev);
> + return -1;
> + }
> + return 0;
> +}
> +
> int is_standard(char *dev, int *nump)
> {
> /* tests if dev is a "standard" md dev name.
>
^ permalink raw reply
* Re: [PATCHv2] mdadm.c: fix compile error "switch condition has boolean value"
From: Jes Sorensen @ 2017-03-30 17:51 UTC (permalink / raw)
To: Gioh Kim, neilb; +Cc: linux-raid, linux-kernel
In-Reply-To: <1490893093-4666-1-git-send-email-gi-oh.kim@profitbricks.com>
On 03/30/2017 12:58 PM, Gioh Kim wrote:
> Remove a boolean expression in switch condition
> to prevent compile error of some compilers,
> for example, gcc version 5.2.1 20151010 (Ubuntu 5.2.1-22ubuntu2).
>
> Signed-off-by: Gioh Kim <gi-oh.kim@profitbricks.com>
> ---
> mdadm.c | 14 ++++++--------
> 1 file changed, 6 insertions(+), 8 deletions(-)
Applied!
Thanks,
Jes
> diff --git a/mdadm.c b/mdadm.c
> index 0f32773..d6b5437 100644
> --- a/mdadm.c
> +++ b/mdadm.c
> @@ -1965,14 +1965,12 @@ static int misc_list(struct mddev_dev *devlist,
> rv |= SetAction(dv->devname, c->action);
> continue;
> }
> - switch(dv->devname[0] == '/') {
> - case 0:
> - mdfd = open_dev(dv->devname);
> - if (mdfd >= 0)
> - break;
> - case 1:
> - mdfd = open_mddev(dv->devname, 1);
> - }
> +
> + if (dv->devname[0] != '/')
> + mdfd = open_dev(dv->devname);
> + if (dv->devname[0] == '/' || mdfd < 0)
> + mdfd = open_mddev(dv->devname, 1);
> +
> if (mdfd >= 0) {
> switch(dv->disposition) {
> case 'R':
>
^ permalink raw reply
* Re: [PATCHv2] mdadm.c: fix compile error "switch condition has boolean value"
From: Wols Lists @ 2017-03-30 17:50 UTC (permalink / raw)
To: Gioh Kim, jes.sorensen, neilb; +Cc: linux-raid, linux-kernel
In-Reply-To: <1490893093-4666-1-git-send-email-gi-oh.kim@profitbricks.com>
You can add my acked-by (never done it before, not sure how :-)
Cheers,
Wol
On 30/03/17 17:58, Gioh Kim wrote:
> Remove a boolean expression in switch condition
> to prevent compile error of some compilers,
> for example, gcc version 5.2.1 20151010 (Ubuntu 5.2.1-22ubuntu2).
>
> Signed-off-by: Gioh Kim <gi-oh.kim@profitbricks.com>
> ---
> mdadm.c | 14 ++++++--------
> 1 file changed, 6 insertions(+), 8 deletions(-)
>
> diff --git a/mdadm.c b/mdadm.c
> index 0f32773..d6b5437 100644
> --- a/mdadm.c
> +++ b/mdadm.c
> @@ -1965,14 +1965,12 @@ static int misc_list(struct mddev_dev *devlist,
> rv |= SetAction(dv->devname, c->action);
> continue;
> }
> - switch(dv->devname[0] == '/') {
> - case 0:
> - mdfd = open_dev(dv->devname);
> - if (mdfd >= 0)
> - break;
> - case 1:
> - mdfd = open_mddev(dv->devname, 1);
> - }
> +
> + if (dv->devname[0] != '/')
> + mdfd = open_dev(dv->devname);
> + if (dv->devname[0] == '/' || mdfd < 0)
> + mdfd = open_mddev(dv->devname, 1);
> +
> if (mdfd >= 0) {
> switch(dv->disposition) {
> case 'R':
>
^ permalink raw reply
* Re: [PATCH 12/23] sd: handle REQ_UNMAP
From: hch @ 2017-03-30 17:30 UTC (permalink / raw)
To: Martin K. Petersen
Cc: hch@lst.de, Bart Van Assche, agk@redhat.com,
lars.ellenberg@linbit.com, snitzer@redhat.com,
philipp.reisner@linbit.com, axboe@kernel.dk, shli@kernel.org,
linux-scsi@vger.kernel.org, dm-devel@redhat.com,
drbd-dev@lists.linbit.com, linux-block@vger.kernel.org,
linux-raid@vger.kernel.org
In-Reply-To: <yq1h92a3hsv.fsf@oracle.com>
On Thu, Mar 30, 2017 at 11:28:32AM -0400, Martin K. Petersen wrote:
> "hch@lst.de" <hch@lst.de> writes:
>
> Christoph,
>
> > On Tue, Mar 28, 2017 at 04:48:55PM +0000, Bart Van Assche wrote:
> >> > if (sdp->no_write_same)
> >> > return BLKPREP_INVALID;
> >> > if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
> >>
> >> Users can change the provisioning mode from user space from SD_LBP_WS16 into
> >> SD_LBP_WS10 so I'm not sure it's safe to skip the (sdkp->ws16 || sector >
> >> 0xffffffff || nr_sectors > 0xffff) check if REQ_UNMAP is set.
> >
> > They can, and if the device has too many sectors that will already cause
> > discard to fail,
>
> I'm not sure I understand what you mean by that?
If you manually change the provisioning mode to WS10 on a device that
must use WRITE SAME (16) to be able to address all blocks you're already
screwed right now, and with this patch you can screw yourself through
the WRITE_ZEROES path in addition to the DISCARD path.
^ permalink raw reply
* Re: [PATCH 23/23] block: remove the discard_zeroes_data flag
From: hch @ 2017-03-30 17:29 UTC (permalink / raw)
To: Martin K. Petersen
Cc: hch@lst.de, Bart Van Assche, agk@redhat.com,
lars.ellenberg@linbit.com, snitzer@redhat.com,
philipp.reisner@linbit.com, axboe@kernel.dk, shli@kernel.org,
linux-scsi@vger.kernel.org, dm-devel@redhat.com,
drbd-dev@lists.linbit.com, linux-block@vger.kernel.org,
linux-raid@vger.kernel.org
In-Reply-To: <yq1d1cy3hra.fsf@oracle.com>
On Thu, Mar 30, 2017 at 11:29:29AM -0400, Martin K. Petersen wrote:
> "hch@lst.de" <hch@lst.de> writes:
>
> > Jens, any opinion? I'd like to remove it too, but I fear it might
> > break things. We could deprecate it first with a warning when read
> > and then remove it a few releases down the road.
>
> I know of several apps that check this variable (as opposed to the
> ioctl).
The above was in reference to both methods..
^ permalink raw reply
* [PATCHv2] mdadm.c: fix compile error "switch condition has boolean value"
From: Gioh Kim @ 2017-03-30 16:58 UTC (permalink / raw)
To: jes.sorensen, neilb; +Cc: linux-raid, linux-kernel, Gioh Kim
Remove a boolean expression in switch condition
to prevent compile error of some compilers,
for example, gcc version 5.2.1 20151010 (Ubuntu 5.2.1-22ubuntu2).
Signed-off-by: Gioh Kim <gi-oh.kim@profitbricks.com>
---
mdadm.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/mdadm.c b/mdadm.c
index 0f32773..d6b5437 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -1965,14 +1965,12 @@ static int misc_list(struct mddev_dev *devlist,
rv |= SetAction(dv->devname, c->action);
continue;
}
- switch(dv->devname[0] == '/') {
- case 0:
- mdfd = open_dev(dv->devname);
- if (mdfd >= 0)
- break;
- case 1:
- mdfd = open_mddev(dv->devname, 1);
- }
+
+ if (dv->devname[0] != '/')
+ mdfd = open_dev(dv->devname);
+ if (dv->devname[0] == '/' || mdfd < 0)
+ mdfd = open_mddev(dv->devname, 1);
+
if (mdfd >= 0) {
switch(dv->disposition) {
case 'R':
--
2.5.0
^ permalink raw reply related
* Re: [PATCH] imsm: use rounded size for metadata initialization
From: Jes Sorensen @ 2017-03-30 15:55 UTC (permalink / raw)
To: Tomasz Majchrzak, linux-raid
In-Reply-To: <1490883941-31953-1-git-send-email-tomasz.majchrzak@intel.com>
On 03/30/2017 10:25 AM, Tomasz Majchrzak wrote:
> Array size is rounded to the nearest MB, however number of data stripes
> and blocks per disk are calculated using size passed by the user. If
> given size is not aligned, there is a mismatch. It's not possible to
> assemble raid0 migrated to raid5 since raid5 arrays use number of data
> stripes to calculate array size.
>
> Signed-off-by: Tomasz Majchrzak <tomasz.majchrzak@intel.com>
> ---
> super-intel.c | 52 ++++++++++++++++++++++++++++++++++------------------
> 1 file changed, 34 insertions(+), 18 deletions(-)
Applied!
Thanks,
Jes
^ permalink raw reply
* Re: [PATCH 2/2] mdadm.c: fix compile error "switch condition has boolean value"
From: Jes Sorensen @ 2017-03-30 15:53 UTC (permalink / raw)
To: Gioh Kim, NeilBrown; +Cc: linux-raid, linux-kernel, Wol
In-Reply-To: <20170330075209.GF17378@ws00837>
On 03/30/2017 03:52 AM, Gioh Kim wrote:
> On Thu, Mar 30, 2017 at 08:38:35AM +1100, NeilBrown wrote:
>> On Thu, Mar 30 2017, jes.sorensen@gmail.com wrote:
>>
>>> Gioh Kim <gi-oh.kim@profitbricks.com> writes:
>>>> Remove a boolean expression in switch condition
>>>> to prevent compile error of some compilers.
>>>
>>> Please be specific, which compile is unable to handle this?
>>>
>>>> Signed-off-by: Gioh Kim <gi-oh.kim@profitbricks.com>
>>>> ---
>>>> mdadm.c | 6 +++---
>>>> 1 file changed, 3 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/mdadm.c b/mdadm.c
>>>> index 08ddcab..a98a051 100644
>>>> --- a/mdadm.c
>>>> +++ b/mdadm.c
>>>> @@ -1905,11 +1905,11 @@ static int misc_list(struct mddev_dev *devlist,
>>>> rv |= SetAction(dv->devname, c->action);
>>>> continue;
>>>> }
>>>> - switch(dv->devname[0] == '/') {
>>>> - case 0:
>>>> + switch(dv->devname[0]) {
>>>> + default:
>>>> mdfd = open_dev(dv->devname);
>>>> if (mdfd >= 0) break;
>>>> - case 1:
>>>> + case '/':
>>>> mdfd = open_mddev(dv->devname, 1);
>>>> }
>>>> if (mdfd>=0) {
>>>
>>> While I agree the original code is ugly, I am not convinced your
>>> replacement is a lot prettier.
>>>
>>
>> Maybe
>>
>> if (dv->devname[0] == '/' ||
>> (mdfd = open_dev(dv->devname)) < 0)
>> mdfd = open_mddev(dv->devname, 1);
>>
>> ??
>> NeilBrown
>
> Yes, the initial version I thought was:
>
> if (dev->devname[0] != '/')
> mdfd = open_dev(dv->devname);
> if (dev->devname[0] == '/' || mdfd < 0)
> mdfd = open_mddev(dv->devname, 1);
>
> But I thought you have a reason to use switch-case expression,
> so I kept switch-case.
> If you agree, I'll send v2 patch using if-expression.
>
I like this solution better, I much favor code that is more explicit in
what it does. Request less brain capacity for me to read :)
Cheers,
Jes
^ permalink raw reply
* Re: [PATCH v1] mdadm/grow: reshape would be stuck from raid1 to raid5
From: Jes Sorensen @ 2017-03-30 15:50 UTC (permalink / raw)
To: Zhilong Liu; +Cc: linux-raid
In-Reply-To: <20170330073808.6176-1-zlliu@suse.com>
On 03/30/2017 03:38 AM, Zhilong Liu wrote:
> systemctl doesn't interpret mdadm-grow-continue@.service
> correctly due to the wrong argument provided in [service],
> it should be corrected %I as %i. Otherwise, if the service
> cannot start by systemctl and the reshap progress would be
> stuck all time when grows array from raid1 to raid5.
>
> reproduce steps:
> ./mdadm -CR /dev/md0 -l1 -b internal -n2 /dev/loop[0-1]
> ./mdadm --grow /dev/md0 -l5 -n3 -a /dev/loop2
>
> Signed-off-by: Zhilong Liu <zlliu@suse.com>
> ---
> systemd/mdadm-grow-continue@.service | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
This solution looks more correct to me :)
Applied!
Thanks,
Jes
^ permalink raw reply
* Re: RFC: always use REQ_OP_WRITE_ZEROES for zeroing offload
From: Mike Snitzer @ 2017-03-30 15:38 UTC (permalink / raw)
To: Martin K. Petersen
Cc: axboe-tSWWG44O7X1aa/9Udqfwiw, linux-raid-u79uwXL29TY76Z2rM5mHXA,
linux-scsi-u79uwXL29TY76Z2rM5mHXA,
philipp.reisner-63ez5xqkn6DQT0dZR+AlfA,
linux-block-u79uwXL29TY76Z2rM5mHXA,
dm-devel-H+wXaHxf7aLQT0dZR+AlfA,
lars.ellenberg-63ez5xqkn6DQT0dZR+AlfA,
shli-DgEjT+Ai2ygdnm+yROfE0A, Christoph Hellwig,
agk-H+wXaHxf7aLQT0dZR+AlfA, drbd-dev-cunTk1MwBs8qoQakbn7OcQ
In-Reply-To: <yq1lgrm3i36.fsf-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
On Thu, Mar 30 2017 at 11:22am -0400,
Martin K. Petersen <martin.petersen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> wrote:
> Mike Snitzer <snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> writes:
>
> > Would be very useful, particularly for testing, if
> > drivers/scsi/scsi_debug.c were updated to support WRITE ZEROES.
>
> There is no WRITE ZEROES in SCSI. You should be able to get the right
> behavior with lbpws=1 lbprz=1.
OK, thanks.
^ permalink raw reply
* Re: [PATCH 23/23] block: remove the discard_zeroes_data flag
From: Martin K. Petersen @ 2017-03-30 15:29 UTC (permalink / raw)
To: hch@lst.de
Cc: axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org,
linux-raid-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
martin.petersen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org,
snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
philipp.reisner-63ez5xqkn6DQT0dZR+AlfA@public.gmane.org,
linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
dm-devel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Bart Van Assche,
lars.ellenberg-63ez5xqkn6DQT0dZR+AlfA@public.gmane.org,
shli-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
agk-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
drbd-dev-cunTk1MwBs8qoQakbn7OcQ@public.gmane.org
In-Reply-To: <20170330090655.GF12015-jcswGhMUV9g@public.gmane.org>
"hch-jcswGhMUV9g@public.gmane.org" <hch-jcswGhMUV9g@public.gmane.org> writes:
> Jens, any opinion? I'd like to remove it too, but I fear it might
> break things. We could deprecate it first with a warning when read
> and then remove it a few releases down the road.
I know of several apps that check this variable (as opposed to the
ioctl).
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply
* Re: [PATCH 12/23] sd: handle REQ_UNMAP
From: Martin K. Petersen @ 2017-03-30 15:28 UTC (permalink / raw)
To: hch@lst.de
Cc: axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org,
linux-raid-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
martin.petersen-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org,
snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
philipp.reisner-63ez5xqkn6DQT0dZR+AlfA@public.gmane.org,
linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
dm-devel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
linux-scsi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Bart Van Assche,
lars.ellenberg-63ez5xqkn6DQT0dZR+AlfA@public.gmane.org,
shli-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
agk-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
drbd-dev-cunTk1MwBs8qoQakbn7OcQ@public.gmane.org
In-Reply-To: <20170330090201.GD12015-jcswGhMUV9g@public.gmane.org>
"hch@lst.de" <hch@lst.de> writes:
Christoph,
> On Tue, Mar 28, 2017 at 04:48:55PM +0000, Bart Van Assche wrote:
>> > if (sdp->no_write_same)
>> > return BLKPREP_INVALID;
>> > if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
>>
>> Users can change the provisioning mode from user space from SD_LBP_WS16 into
>> SD_LBP_WS10 so I'm not sure it's safe to skip the (sdkp->ws16 || sector >
>> 0xffffffff || nr_sectors > 0xffff) check if REQ_UNMAP is set.
>
> They can, and if the device has too many sectors that will already cause
> discard to fail,
I'm not sure I understand what you mean by that?
--
Martin K. Petersen Oracle Linux Engineering
_______________________________________________
drbd-dev mailing list
drbd-dev@lists.linbit.com
http://lists.linbit.com/mailman/listinfo/drbd-dev
^ permalink raw reply
* Re: RFC: always use REQ_OP_WRITE_ZEROES for zeroing offload
From: Martin K. Petersen @ 2017-03-30 15:22 UTC (permalink / raw)
To: Mike Snitzer
Cc: Christoph Hellwig, axboe, martin.petersen, agk, shli,
philipp.reisner, lars.ellenberg, linux-block, linux-raid,
dm-devel, linux-scsi, drbd-dev
In-Reply-To: <20170330151257.GA910@redhat.com>
Mike Snitzer <snitzer@redhat.com> writes:
> Would be very useful, particularly for testing, if
> drivers/scsi/scsi_debug.c were updated to support WRITE ZEROES.
There is no WRITE ZEROES in SCSI. You should be able to get the right
behavior with lbpws=1 lbprz=1.
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox