* [PATCH 4/7] block: ioctl to zero block ranges
2012-03-02 3:22 Write same support v3 Martin K. Petersen
@ 2012-03-02 3:22 ` Martin K. Petersen
0 siblings, 0 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-02 3:22 UTC (permalink / raw)
To: jaxboe, James.Bottomley, snitzer, vgoyal, michaelc
Cc: linux-scsi, Martin K. Petersen
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Introduce a BLKZEROOUT ioctl which can be used to clear block ranges by
way of blkdev_issue_zeroout().
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
---
block/ioctl.c | 27 +++++++++++++++++++++++++++
include/linux/fs.h | 1 +
2 files changed, 28 insertions(+), 0 deletions(-)
diff --git a/block/ioctl.c b/block/ioctl.c
index ba15b2d..0c6e633 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -132,6 +132,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
}
+static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
+ uint64_t len)
+{
+ if (start & 511)
+ return -EINVAL;
+ if (len & 511)
+ return -EINVAL;
+ start >>= 9;
+ len >>= 9;
+
+ if (start + len > (i_size_read(bdev->bd_inode) >> 9))
+ return -EINVAL;
+
+ return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
+}
+
static int put_ushort(unsigned long arg, unsigned short val)
{
return put_user(val, (unsigned short __user *)arg);
@@ -247,6 +263,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return blk_ioctl_discard(bdev, range[0], range[1],
cmd == BLKSECDISCARD);
}
+ case BLKZEROOUT: {
+ uint64_t range[2];
+
+ if (!(mode & FMODE_WRITE))
+ return -EBADF;
+
+ if (copy_from_user(range, (void __user *)arg, sizeof(range)))
+ return -EFAULT;
+
+ return blk_ioctl_zeroout(bdev, range[0], range[1]);
+ }
case HDIO_GETGEO: {
struct hd_geometry geo;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 386da09..e918ef7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -320,6 +320,7 @@ struct inodes_stat_t {
#define BLKDISCARDZEROES _IO(0x12,124)
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
--
1.7.8.3.21.gab8a7
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Write same support v4
@ 2012-03-16 18:43 Martin K. Petersen
2012-03-16 18:43 ` [PATCH 1/7] block: Clean up discard handling logic Martin K. Petersen
` (6 more replies)
0 siblings, 7 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-16 18:43 UTC (permalink / raw)
To: jaxboe, jbottomley; +Cc: linux-scsi
Changes since v3:
- Document that the REQ_NOMERGE_FLAGS is used for both bio and request
merging.
- Do size accounting correctly so we don't have to use
blk_add_request_payload(). This also fixes an issue discovered by
Mike: When running over a flaky iSCSI connection we would end up
completing one block and subsequently fail trying to redrive the
remainder of the I/O. An unfortunate side effect of the discrepancy
between the DMA transfer size and the logical block range affected by
the command.
I put a writesame4 branch on:
git://git.kernel.org/pub/scm/linux/kernel/git/mkp/linux.git
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/7] block: Clean up discard handling logic
2012-03-16 18:43 Write same support v4 Martin K. Petersen
@ 2012-03-16 18:43 ` Martin K. Petersen
2012-03-16 18:43 ` [PATCH 2/7] block: Implement support for WRITE SAME Martin K. Petersen
` (5 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-16 18:43 UTC (permalink / raw)
To: jaxboe, jbottomley; +Cc: linux-scsi, Martin K. Petersen
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Discards were globally marked as mergeable and as a result we had
several code paths that explicitly disabled merging when discard was
set. Mark discard requests as unmergable and remove special-casing of
REQ_DISCARD. The relevant nomerge flags are consolidated in blk_types.h,
and rq_mergeable() and bio_mergeable() have been modified to use them.
bio_is_rw() is used in place of bio_has_data() a few places. This is
done to to distinguish true reads and writes from other fs type requests
that carry a payload (e.g. WRITE SAME).
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
---
block/blk-core.c | 10 +++++-----
block/blk-merge.c | 22 +---------------------
block/blk.h | 5 ++---
block/elevator.c | 6 ++----
include/linux/bio.h | 23 +++++++++++++++++++++--
include/linux/blk_types.h | 5 +++++
include/linux/blkdev.h | 23 +++++++++++------------
7 files changed, 47 insertions(+), 47 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 3a78b00..5575230 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1545,8 +1545,8 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
- if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
- nr_sectors > queue_max_hw_sectors(q))) {
+ if (likely(bio_is_rw(bio) &&
+ nr_sectors > queue_max_hw_sectors(q))) {
printk(KERN_ERR "bio too big device %s (%u > %u)\n",
bdevname(bio->bi_bdev, b),
bio_sectors(bio),
@@ -1698,7 +1698,7 @@ void submit_bio(int rw, struct bio *bio)
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
- if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
+ if (bio_has_data(bio)) {
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
@@ -1744,7 +1744,7 @@ EXPORT_SYMBOL(submit_bio);
*/
int blk_rq_check_limits(struct request_queue *q, struct request *rq)
{
- if (rq->cmd_flags & REQ_DISCARD)
+ if (!rq_mergeable(rq))
return 0;
if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
@@ -2218,7 +2218,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
req->buffer = bio_data(req->bio);
/* update sector only for requests with clear definition of sector */
- if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
+ if (req->cmd_type == REQ_TYPE_FS)
req->__sector += total_bytes >> 9;
/* mixed attributes always follow the first bio */
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 160035f..9172606 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -371,18 +371,6 @@ static int attempt_merge(struct request_queue *q, struct request *req,
return 0;
/*
- * Don't merge file system requests and discard requests
- */
- if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD))
- return 0;
-
- /*
- * Don't merge discard requests and secure discard requests
- */
- if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE))
- return 0;
-
- /*
* not contiguous
*/
if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
@@ -474,15 +462,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
{
- if (!rq_mergeable(rq))
- return false;
-
- /* don't merge file system requests and discard requests */
- if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
- return false;
-
- /* don't merge discard requests and secure discard requests */
- if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
+ if (!rq_mergeable(rq) || !bio_mergeable(bio))
return false;
/* different data direction or already started, don't merge */
diff --git a/block/blk.h b/block/blk.h
index 9c12f80..1983b29 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -187,14 +187,13 @@ static inline int blk_cpu_to_group(int cpu)
*
* a) it's attached to a gendisk, and
* b) the queue had IO stats enabled when this request was started, and
- * c) it's a file system request or a discard request
+ * c) it's a file system request
*/
static inline int blk_do_io_stat(struct request *rq)
{
return rq->rq_disk &&
(rq->cmd_flags & REQ_IO_STAT) &&
- (rq->cmd_type == REQ_TYPE_FS ||
- (rq->cmd_flags & REQ_DISCARD));
+ (rq->cmd_type == REQ_TYPE_FS);
}
/*
diff --git a/block/elevator.c b/block/elevator.c
index f016855..cccfdbf 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -591,8 +591,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
if (rq->cmd_flags & REQ_SOFTBARRIER) {
/* barriers are scheduling boundary, update end_sector */
- if (rq->cmd_type == REQ_TYPE_FS ||
- (rq->cmd_flags & REQ_DISCARD)) {
+ if (rq->cmd_type == REQ_TYPE_FS) {
q->end_sector = rq_end_sector(rq);
q->boundary_rq = rq;
}
@@ -634,8 +633,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
if (elv_attempt_insert_merge(q, rq))
break;
case ELEVATOR_INSERT_SORT:
- BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
- !(rq->cmd_flags & REQ_DISCARD));
+ BUG_ON(rq->cmd_type != REQ_TYPE_FS);
rq->cmd_flags |= REQ_SORTED;
q->nr_sorted++;
if (rq_mergeable(rq)) {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 129a9c0..cad2a9c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -358,9 +358,28 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
/*
* Check whether this bio carries any data or not. A NULL bio is allowed.
*/
-static inline int bio_has_data(struct bio *bio)
+static inline bool bio_has_data(struct bio *bio)
{
- return bio && bio->bi_io_vec != NULL;
+ if (bio && bio->bi_io_vec)
+ return true;
+
+ return false;
+}
+
+static inline bool bio_is_rw(struct bio *bio)
+{
+ if (!bio_has_data(bio))
+ return false;
+
+ return true;
+}
+
+static inline bool bio_mergeable(struct bio *bio)
+{
+ if (bio->bi_rw & REQ_NOMERGE_FLAGS)
+ return false;
+
+ return true;
}
/*
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4053cbd..bf27ffc 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -170,6 +170,11 @@ enum rq_flag_bits {
REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
#define REQ_CLONE_MASK REQ_COMMON_MASK
+/* This mask is used for both bio and request merge checking */
+#define REQ_NOMERGE_FLAGS \
+ (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | \
+ REQ_DISCARD)
+
#define REQ_RAHEAD (1 << __REQ_RAHEAD)
#define REQ_THROTTLED (1 << __REQ_THROTTLED)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 606cf33..3943933 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -512,8 +512,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_account_rq(rq) \
(((rq)->cmd_flags & REQ_STARTED) && \
- ((rq)->cmd_type == REQ_TYPE_FS || \
- ((rq)->cmd_flags & REQ_DISCARD)))
+ ((rq)->cmd_type == REQ_TYPE_FS))
#define blk_pm_request(rq) \
((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
@@ -569,17 +568,17 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync)
queue_flag_clear(QUEUE_FLAG_ASYNCFULL, q);
}
+static inline bool rq_mergeable(struct request *rq)
+{
+ if (rq->cmd_type != REQ_TYPE_FS)
+ return false;
+
+ if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
+ return false;
+
+ return true;
+}
-/*
- * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
- * it already be started by driver.
- */
-#define RQ_NOMERGE_FLAGS \
- (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
-#define rq_mergeable(rq) \
- (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
- (((rq)->cmd_flags & REQ_DISCARD) || \
- (rq)->cmd_type == REQ_TYPE_FS))
/*
* q->prep_rq_fn return values
--
1.7.7.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/7] block: Implement support for WRITE SAME
2012-03-16 18:43 Write same support v4 Martin K. Petersen
2012-03-16 18:43 ` [PATCH 1/7] block: Clean up discard handling logic Martin K. Petersen
@ 2012-03-16 18:43 ` Martin K. Petersen
2012-03-16 18:43 ` [PATCH 3/7] block: Make blkdev_issue_zeroout use " Martin K. Petersen
` (4 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-16 18:43 UTC (permalink / raw)
To: jaxboe, jbottomley; +Cc: linux-scsi, Martin K. Petersen
From: "Martin K. Petersen" <martin.petersen@oracle.com>
The WRITE SAME command supported on some SCSI devices allows the same
block to be efficiently replicated throughout a block range. Only a
single logical block is transferred from the host and the storage device
writes the same data to all blocks described by the I/O.
This patch implements support for WRITE SAME in the block layer. The
blkdev_issue_write_same() function can be used by filesystems and block
drivers to replicate a buffer across a block range. This can be used to
efficiently initialize software RAID devices, etc.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
---
Documentation/ABI/testing/sysfs-block | 13 ++++++
block/blk-core.c | 14 +++++-
block/blk-lib.c | 74 +++++++++++++++++++++++++++++++++
block/blk-settings.c | 16 +++++++
block/blk-sysfs.c | 13 ++++++
include/linux/bio.h | 3 +
include/linux/blk_types.h | 7 ++-
include/linux/blkdev.h | 16 +++++++
8 files changed, 152 insertions(+), 4 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index c1eb41c..02ed30c 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -206,3 +206,16 @@ Description:
when a discarded area is read the discard_zeroes_data
parameter will be set to one. Otherwise it will be 0 and
the result of reading a discarded area is undefined.
+
+What: /sys/block/<disk>/queue/write_same_max_bytes
+Date: January 2012
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Some devices support a write same operation in which a
+ single data block can be written to a range of several
+ contiguous blocks on storage. This can be used to wipe
+ areas on disk or to initialize drives in a RAID
+ configuration. write_same_max_bytes indicates how many
+ bytes can be written in a single write same command. If
+ write_same_max_bytes is 0, write same is not supported
+ by the device.
diff --git a/block/blk-core.c b/block/blk-core.c
index 5575230..1cd55be 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1593,6 +1593,11 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
+ if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
+ err = -EOPNOTSUPP;
+ goto end_io;
+ }
+
if (blk_throtl_bio(q, bio))
return false; /* throttled, will be resubmitted later */
@@ -1690,8 +1695,6 @@ EXPORT_SYMBOL(generic_make_request);
*/
void submit_bio(int rw, struct bio *bio)
{
- int count = bio_sectors(bio);
-
bio->bi_rw |= rw;
/*
@@ -1699,6 +1702,13 @@ void submit_bio(int rw, struct bio *bio)
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio)) {
+ unsigned int count;
+
+ if (unlikely(rw & REQ_WRITE_SAME))
+ count = bdev_logical_block_size(bio->bi_bdev) >> 9;
+ else
+ count = bio_sectors(bio);
+
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 2b461b4..e6cb5b4 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -115,6 +115,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
EXPORT_SYMBOL(blkdev_issue_discard);
/**
+ * blkdev_issue_write_same - queue a write same operation
+ * @bdev: target blockdev
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @page: page containing data to write
+ *
+ * Description:
+ * Issue a write same request for the sectors in question.
+ */
+int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask,
+ struct page *page)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_write_same_sectors;
+ struct bio_batch bb;
+ struct bio *bio;
+ int ret = 0;
+
+ if (!q)
+ return -ENXIO;
+
+ max_write_same_sectors = q->limits.max_write_same_sectors;
+
+ if (max_write_same_sectors == 0)
+ return -EOPNOTSUPP;
+
+ atomic_set(&bb.done, 1);
+ bb.flags = 1 << BIO_UPTODATE;
+ bb.wait = &wait;
+
+ while (nr_sects) {
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ bio->bi_sector = sector;
+ bio->bi_end_io = bio_batch_end_io;
+ bio->bi_bdev = bdev;
+ bio->bi_private = &bb;
+ bio->bi_vcnt = 1;
+ bio->bi_io_vec->bv_page = page;
+ bio->bi_io_vec->bv_offset = 0;
+ bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
+
+ if (nr_sects > max_write_same_sectors) {
+ bio->bi_size = max_write_same_sectors << 9;
+ nr_sects -= max_write_same_sectors;
+ sector += max_write_same_sectors;
+ } else {
+ bio->bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+
+ atomic_inc(&bb.done);
+ submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
+ }
+
+ /* Wait for bios in-flight */
+ if (!atomic_dec_and_test(&bb.done))
+ wait_for_completion(&wait);
+
+ if (!test_bit(BIO_UPTODATE, &bb.flags))
+ ret = -ENOTSUPP;
+
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_write_same);
+
+/**
* blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
* @sector: start sector
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d3234fc..5680b91 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+ lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
@@ -143,6 +144,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->discard_zeroes_data = 1;
lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX;
+ lim->max_write_same_sectors = UINT_MAX;
lim->max_sectors = BLK_DEF_MAX_SECTORS;
}
@@ -287,6 +289,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
+ * blk_queue_max_write_same_sectors - set max sectors for a single write same
+ * @q: the request queue for the device
+ * @max_write_same_sectors: maximum number of sectors to write per command
+ **/
+void blk_queue_max_write_same_sectors(struct request_queue *q,
+ unsigned int max_write_same_sectors)
+{
+ q->limits.max_write_same_sectors = max_write_same_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
+
+/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
@@ -511,6 +525,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+ t->max_write_same_sectors = min(t->max_write_same_sectors,
+ b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cf15001..5b85d91 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -161,6 +161,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(queue_discard_zeroes_data(q), page);
}
+static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (unsigned long long)q->limits.max_write_same_sectors << 9);
+}
+
+
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
@@ -357,6 +364,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.show = queue_discard_zeroes_data_show,
};
+static struct queue_sysfs_entry queue_write_same_max_entry = {
+ .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
+ .show = queue_write_same_max_show,
+};
+
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
@@ -404,6 +416,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
+ &queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index cad2a9c..df4b91e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -371,6 +371,9 @@ static inline bool bio_is_rw(struct bio *bio)
if (!bio_has_data(bio))
return false;
+ if (bio->bi_rw & REQ_WRITE_SAME)
+ return false;
+
return true;
}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index bf27ffc..7718e69 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -124,6 +124,7 @@ enum rq_flag_bits {
__REQ_PRIO, /* boost priority in cfq */
__REQ_DISCARD, /* request to discard sectors */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
+ __REQ_WRITE_SAME, /* write same block many times */
__REQ_NOIDLE, /* don't anticipate more IO after this one */
__REQ_FUA, /* forced unit access */
@@ -161,19 +162,21 @@ enum rq_flag_bits {
#define REQ_META (1 << __REQ_META)
#define REQ_PRIO (1 << __REQ_PRIO)
#define REQ_DISCARD (1 << __REQ_DISCARD)
+#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
#define REQ_COMMON_MASK \
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
- REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
+ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
+ REQ_SECURE)
#define REQ_CLONE_MASK REQ_COMMON_MASK
/* This mask is used for both bio and request merge checking */
#define REQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | \
- REQ_DISCARD)
+ REQ_DISCARD | REQ_WRITE_SAME)
#define REQ_RAHEAD (1 << __REQ_RAHEAD)
#define REQ_THROTTLED (1 << __REQ_THROTTLED)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3943933..8c97ee9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -254,6 +254,7 @@ struct queue_limits {
unsigned int io_min;
unsigned int io_opt;
unsigned int max_discard_sectors;
+ unsigned int max_write_same_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -831,6 +832,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
+extern void blk_queue_max_write_same_sectors(struct request_queue *q,
+ unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -955,6 +958,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
+extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, void *buffer,
+ unsigned int length);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
@@ -1122,6 +1128,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
return queue_discard_zeroes_data(bdev_get_queue(bdev));
}
+static inline unsigned int bdev_write_same(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q)
+ return q->limits.max_write_same_sectors;
+
+ return 0;
+}
+
static inline int queue_dma_alignment(struct request_queue *q)
{
return q ? q->dma_alignment : 511;
--
1.7.7.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/7] block: Make blkdev_issue_zeroout use WRITE SAME
2012-03-16 18:43 Write same support v4 Martin K. Petersen
2012-03-16 18:43 ` [PATCH 1/7] block: Clean up discard handling logic Martin K. Petersen
2012-03-16 18:43 ` [PATCH 2/7] block: Implement support for WRITE SAME Martin K. Petersen
@ 2012-03-16 18:43 ` Martin K. Petersen
2012-03-16 18:43 ` [PATCH 4/7] block: ioctl to zero block ranges Martin K. Petersen
` (3 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-16 18:43 UTC (permalink / raw)
To: jaxboe, jbottomley; +Cc: linux-scsi, Martin K. Petersen
From: "Martin K. Petersen" <martin.petersen@oracle.com>
If the device supports WRITE SAME, use that to optimize zeroing of
blocks. If the device does not support WRITE SAME or if the operation
fails, fall back to writing zeroes the old-fashioned way.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
---
block/blk-lib.c | 31 ++++++++++++++++++++++++++++++-
include/linux/blkdev.h | 3 +--
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index e6cb5b4..5a8d026 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(blkdev_issue_write_same);
* Generate and issue number of bios with zerofiled pages.
*/
-int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask)
{
int ret;
@@ -249,4 +249,33 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
return ret;
}
+
+/**
+ * blkdev_issue_zeroout - zero-fill a block range
+ * @bdev: blockdev to write
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ * Generate and issue number of bios with zerofiled pages.
+ */
+
+int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask)
+{
+ if (bdev_write_same(bdev)) {
+ unsigned char bdn[BDEVNAME_SIZE];
+
+ if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
+ ZERO_PAGE(0)))
+ return 0;
+
+ bdevname(bdev, bdn);
+ printk(KERN_ERR "%s: WRITE SAME failed. Manually zeroing.\n",
+ bdn);
+ }
+
+ return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
+}
EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8c97ee9..92956b7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -959,8 +959,7 @@ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, void *buffer,
- unsigned int length);
+ sector_t nr_sects, gfp_t gfp_mask, struct page *page);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
--
1.7.7.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/7] block: ioctl to zero block ranges
2012-03-16 18:43 Write same support v4 Martin K. Petersen
` (2 preceding siblings ...)
2012-03-16 18:43 ` [PATCH 3/7] block: Make blkdev_issue_zeroout use " Martin K. Petersen
@ 2012-03-16 18:43 ` Martin K. Petersen
2012-03-16 18:43 ` [PATCH 5/7] scsi: Add a report opcode helper Martin K. Petersen
` (2 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-16 18:43 UTC (permalink / raw)
To: jaxboe, jbottomley; +Cc: linux-scsi, Martin K. Petersen
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Introduce a BLKZEROOUT ioctl which can be used to clear block ranges by
way of blkdev_issue_zeroout().
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
---
block/ioctl.c | 27 +++++++++++++++++++++++++++
include/linux/fs.h | 1 +
2 files changed, 28 insertions(+), 0 deletions(-)
diff --git a/block/ioctl.c b/block/ioctl.c
index ba15b2d..0c6e633 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -132,6 +132,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
}
+static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
+ uint64_t len)
+{
+ if (start & 511)
+ return -EINVAL;
+ if (len & 511)
+ return -EINVAL;
+ start >>= 9;
+ len >>= 9;
+
+ if (start + len > (i_size_read(bdev->bd_inode) >> 9))
+ return -EINVAL;
+
+ return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
+}
+
static int put_ushort(unsigned long arg, unsigned short val)
{
return put_user(val, (unsigned short __user *)arg);
@@ -247,6 +263,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return blk_ioctl_discard(bdev, range[0], range[1],
cmd == BLKSECDISCARD);
}
+ case BLKZEROOUT: {
+ uint64_t range[2];
+
+ if (!(mode & FMODE_WRITE))
+ return -EBADF;
+
+ if (copy_from_user(range, (void __user *)arg, sizeof(range)))
+ return -EFAULT;
+
+ return blk_ioctl_zeroout(bdev, range[0], range[1]);
+ }
case HDIO_GETGEO: {
struct hd_geometry geo;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 386da09..e918ef7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -320,6 +320,7 @@ struct inodes_stat_t {
#define BLKDISCARDZEROES _IO(0x12,124)
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
--
1.7.7.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 5/7] scsi: Add a report opcode helper
2012-03-16 18:43 Write same support v4 Martin K. Petersen
` (3 preceding siblings ...)
2012-03-16 18:43 ` [PATCH 4/7] block: ioctl to zero block ranges Martin K. Petersen
@ 2012-03-16 18:43 ` Martin K. Petersen
2012-03-16 18:43 ` [PATCH 6/7] sd: Implement support for WRITE SAME Martin K. Petersen
2012-03-16 18:43 ` [PATCH 7/7] sd: Use sd_ prefix for flush and discard functions Martin K. Petersen
6 siblings, 0 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-16 18:43 UTC (permalink / raw)
To: jaxboe, jbottomley; +Cc: linux-scsi, Martin K. Petersen
From: "Martin K. Petersen" <martin.petersen@oracle.com>
The REPORT SUPPORTED OPERATION CODES command can be used to query
whether a given opcode is supported by a device. Add a helper function
that allows us to look up commands.
We only issue RSOC if the device reports compliance with SPC-3 or
later. But to err on the side of caution we disable the command for ATA,
FireWire and USB.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/ata/libata-scsi.c | 1 +
drivers/firewire/sbp2.c | 1 +
drivers/scsi/scsi.c | 45 ++++++++++++++++++++++++++++++++++++++++
drivers/usb/storage/scsiglue.c | 3 ++
include/scsi/scsi_device.h | 3 ++
5 files changed, 53 insertions(+), 0 deletions(-)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 508a60b..b6db28f 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1052,6 +1052,7 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev)
{
sdev->use_10_for_rw = 1;
sdev->use_10_for_ms = 1;
+ sdev->no_report_opcodes = 1;
/* Schedule policy is determined by ->qc_defer() callback and
* it needs to see every deferred qc. Set dev_blocked to 1 to
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 80e95aa..d956dd38 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1517,6 +1517,7 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
struct sbp2_logical_unit *lu = sdev->hostdata;
sdev->use_10_for_rw = 1;
+ sdev->no_report_opcodes = 1;
if (sbp2_param_exclusive_login)
sdev->manage_start_stop = 1;
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 2aeb2e9..8d00214 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -54,6 +54,7 @@
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <asm/unaligned.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -1063,6 +1064,50 @@ int scsi_get_vpd_page(struct scsi_device *sdev, u8 page, unsigned char *buf,
EXPORT_SYMBOL_GPL(scsi_get_vpd_page);
/**
+ * scsi_report_opcode - Find out if a given command opcode is supported
+ * @sdp: scsi device to query
+ * @buffer: scratch buffer (must be at least 20 bytes long)
+ * @len: length of buffer
+ * @opcode: opcode for command to look up
+ *
+ * Uses the REPORT SUPPORTED OPERATION CODES to look up the given
+ * opcode. Returns 0 if RSOC fails or if the command opcode is
+ * unsupported. Returns 1 if the device claims to support the command.
+ */
+int scsi_report_opcode(struct scsi_device *sdp, unsigned char *buffer,
+ unsigned int len, unsigned char opcode)
+{
+ unsigned char cmd[16];
+ struct scsi_sense_hdr sshdr;
+ int result;
+
+ if (sdp->no_report_opcodes || sdp->scsi_level < SCSI_SPC_3)
+ return 0;
+
+ memset(cmd, 0, 16);
+ cmd[0] = MAINTENANCE_IN;
+ cmd[1] = MI_REPORT_SUPPORTED_OPERATION_CODES;
+ cmd[2] = 1; /* One command format */
+ cmd[3] = opcode;
+ put_unaligned_be32(len, &cmd[6]);
+ memset(buffer, 0, len);
+
+ result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, buffer, len,
+ &sshdr, 30 * HZ, 3, NULL);
+
+ if (result && scsi_sense_valid(&sshdr) &&
+ sshdr.sense_key == ILLEGAL_REQUEST &&
+ (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00)
+ return 0;
+
+ if ((buffer[1] & 3) == 3) /* Command supported */
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(scsi_report_opcode);
+
+/**
* scsi_device_get - get an additional reference to a scsi_device
* @sdev: device to get a reference to
*
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 13b8bcd..61178b8 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -197,6 +197,9 @@ static int slave_configure(struct scsi_device *sdev)
* page x08, so we will skip it. */
sdev->skip_ms_page_8 = 1;
+ /* Do not attempt to use REPORT SUPPORTED OPERATION CODES */
+ sdev->no_report_opcodes = 1;
+
/* Some disks return the total number of blocks in response
* to READ CAPACITY rather than the highest block number.
* If this device makes that mistake, tell the sd driver. */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 77273f2..59f31dc 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -134,6 +134,7 @@ struct scsi_device {
* because we did a bus reset. */
unsigned use_10_for_rw:1; /* first try 10-byte read / write */
unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
+ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */
unsigned skip_ms_page_8:1; /* do not use MODE SENSE page 0x08 */
unsigned skip_ms_page_3f:1; /* do not use MODE SENSE page 0x3f */
unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */
@@ -354,6 +355,8 @@ extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout,
int retries, struct scsi_sense_hdr *sshdr);
extern int scsi_get_vpd_page(struct scsi_device *, u8 page, unsigned char *buf,
int buf_len);
+extern int scsi_report_opcode(struct scsi_device *sdp, unsigned char *buffer,
+ unsigned int len, unsigned char opcode);
extern int scsi_device_set_state(struct scsi_device *sdev,
enum scsi_device_state state);
extern struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type,
--
1.7.7.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 6/7] sd: Implement support for WRITE SAME
2012-03-16 18:43 Write same support v4 Martin K. Petersen
` (4 preceding siblings ...)
2012-03-16 18:43 ` [PATCH 5/7] scsi: Add a report opcode helper Martin K. Petersen
@ 2012-03-16 18:43 ` Martin K. Petersen
2012-03-16 18:43 ` [PATCH 7/7] sd: Use sd_ prefix for flush and discard functions Martin K. Petersen
6 siblings, 0 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-16 18:43 UTC (permalink / raw)
To: jaxboe, jbottomley; +Cc: linux-scsi, Martin K. Petersen
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Implement support for WRITE SAME(10) and WRITE SAME(16) in the SCSI disk
driver.
- We set the default maximum to 0xFFFF because there are several
devices out there that only support two-byte block counts even with
WRITE SAME(16). We only enable transfers bigger than 0xFFFF if the
device explicitly reports MAXIMUM WRITE SAME LENGTH in the BLOCK
LIMITS VPD.
- max_write_same_blocks can be overriden per-device basis in sysfs.
- The UNMAP discovery heuristics remain unchanged but the discard
limits are tweaked to match the "real" WRITE SAME commands.
- In the error handling logic we now distinguish between WRITE SAME
with and without UNMAP set.
The discovery process heuristics are:
- If the device reports a SCSI level of SPC-3 or greater we'll issue
READ SUPPORTED OPERATION CODES to find out whether WRITE SAME(16) is
supported. If that's the case we will use it.
- If the device supports the block limits VPD and reports a MAXIMUM
WRITE SAME LENGTH bigger than 0xFFFF we will use WRITE SAME(16).
- Otherwise we will use WRITE SAME(10) unless the target LBA is beyond
0xFFFFFFFF or the block count exceeds 0xFFFF.
- no_write_same is set for ATA, FireWire and USB.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
---
drivers/ata/libata-scsi.c | 1 +
drivers/firewire/sbp2.c | 1 +
drivers/scsi/scsi_lib.c | 22 ++++-
drivers/scsi/sd.c | 182 +++++++++++++++++++++++++++++++++++++--
drivers/scsi/sd.h | 7 ++
drivers/usb/storage/scsiglue.c | 3 +
include/scsi/scsi_device.h | 1 +
7 files changed, 202 insertions(+), 15 deletions(-)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b6db28f..dde907b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1053,6 +1053,7 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev)
sdev->use_10_for_rw = 1;
sdev->use_10_for_ms = 1;
sdev->no_report_opcodes = 1;
+ sdev->no_write_same = 1;
/* Schedule policy is determined by ->qc_defer() callback and
* it needs to see every deferred qc. Set dev_blocked to 1 to
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index d956dd38..52b2bac 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1518,6 +1518,7 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
sdev->use_10_for_rw = 1;
sdev->no_report_opcodes = 1;
+ sdev->no_write_same = 1;
if (sbp2_param_exclusive_login)
sdev->manage_start_stop = 1;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b2c95db..560b63f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -874,11 +874,23 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
action = ACTION_FAIL;
error = -EILSEQ;
/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
- } else if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
- (cmd->cmnd[0] == UNMAP ||
- cmd->cmnd[0] == WRITE_SAME_16 ||
- cmd->cmnd[0] == WRITE_SAME)) {
- description = "Discard failure";
+ } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
+ switch(cmd->cmnd[0]) {
+ case UNMAP:
+ description = "Discard failure";
+ break;
+ case WRITE_SAME:
+ case WRITE_SAME_16:
+ if (cmd->cmnd[1] & 0x8)
+ description = "Discard failure";
+ else
+ description =
+ "Write same failure";
+ break;
+ default:
+ description = "Invalid command failure";
+ break;
+ }
action = ACTION_FAIL;
} else
action = ACTION_FAIL;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c691fb5..26c33db 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -98,6 +98,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
#endif
static void sd_config_discard(struct scsi_disk *, unsigned int);
+static void sd_config_write_same(struct scsi_disk *);
static int sd_revalidate_disk(struct gendisk *);
static void sd_unlock_native_capacity(struct gendisk *disk);
static int sd_probe(struct device *);
@@ -346,6 +347,45 @@ sd_store_provisioning_mode(struct device *dev, struct device_attribute *attr,
return count;
}
+static ssize_t
+sd_show_write_same_blocks(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+ return snprintf(buf, 20, "%u\n", sdkp->max_ws_blocks);
+}
+
+static ssize_t
+sd_store_write_same_blocks(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct scsi_disk *sdkp = to_scsi_disk(dev);
+ struct scsi_device *sdp = sdkp->device;
+ unsigned long max;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ if (sdp->type != TYPE_DISK)
+ return -EINVAL;
+
+ err = kstrtoul(buf, 10, &max);
+
+ if (err)
+ return err;
+
+ if (max == 0)
+ sdp->no_write_same = 1;
+ else if (max <= SD_MAX_WS16_BLOCKS)
+ sdkp->max_ws_blocks = max;
+
+ sd_config_write_same(sdkp);
+
+ return count;
+}
+
static struct device_attribute sd_disk_attrs[] = {
__ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type,
sd_store_cache_type),
@@ -360,6 +400,8 @@ static struct device_attribute sd_disk_attrs[] = {
__ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL),
__ATTR(provisioning_mode, S_IRUGO|S_IWUSR, sd_show_provisioning_mode,
sd_store_provisioning_mode),
+ __ATTR(max_write_same_blocks, S_IRUGO|S_IWUSR,
+ sd_show_write_same_blocks, sd_store_write_same_blocks),
__ATTR_NULL,
};
@@ -505,19 +547,23 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
return;
case SD_LBP_UNMAP:
- max_blocks = min_not_zero(sdkp->max_unmap_blocks, 0xffffffff);
+ max_blocks = min_not_zero(sdkp->max_unmap_blocks,
+ (u32)SD_MAX_WS16_BLOCKS);
break;
case SD_LBP_WS16:
- max_blocks = min_not_zero(sdkp->max_ws_blocks, 0xffffffff);
+ max_blocks = min_not_zero(sdkp->max_ws_blocks,
+ (u32)SD_MAX_WS16_BLOCKS);
break;
case SD_LBP_WS10:
- max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff);
+ max_blocks = min_not_zero(sdkp->max_ws_blocks,
+ (u32)SD_MAX_WS10_BLOCKS);
break;
case SD_LBP_ZERO:
- max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff);
+ max_blocks = min_not_zero(sdkp->max_ws_blocks,
+ (u32)SD_MAX_WS10_BLOCKS);
q->limits.discard_zeroes_data = 1;
break;
}
@@ -615,6 +661,78 @@ out:
return ret;
}
+static void sd_config_write_same(struct scsi_disk *sdkp)
+{
+ struct request_queue *q = sdkp->disk->queue;
+ unsigned int logical_block_size = sdkp->device->sector_size;
+ unsigned int blocks = 0;
+
+ if (sdkp->device->no_write_same) {
+ sdkp->max_ws_blocks = 0;
+ goto out;
+ }
+
+ /* Some devices can not handle block counts above 0xffff despite
+ * supporting WRITE SAME(16). Consequently we default to 64k
+ * blocks per I/O unless the device explicitly advertises a
+ * bigger limit.
+ */
+ if (sdkp->max_ws_blocks == 0)
+ sdkp->max_ws_blocks = SD_MAX_WS10_BLOCKS;
+
+ if (sdkp->ws16 || sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS)
+ blocks = min_not_zero(sdkp->max_ws_blocks,
+ (u32)SD_MAX_WS16_BLOCKS);
+ else
+ blocks = min_not_zero(sdkp->max_ws_blocks,
+ (u32)SD_MAX_WS10_BLOCKS);
+
+out:
+ blk_queue_max_write_same_sectors(q, blocks * (logical_block_size >> 9));
+}
+
+/**
+ * sd_setup_write_same_cmnd - write the same data to multiple blocks
+ * @sdp: scsi device to operate one
+ * @rq: Request to prepare
+ *
+ * Will issue either WRITE SAME(10) or WRITE SAME(16) depending on
+ * preference indicated by target device.
+ **/
+static int sd_setup_write_same_cmnd(struct scsi_device *sdp, struct request *rq)
+{
+ struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+ struct bio *bio = rq->bio;
+ sector_t sector = bio->bi_sector;
+ unsigned int nr_sectors = bio_sectors(bio);
+
+ if (sdkp->device->no_write_same)
+ return BLKPREP_KILL;
+
+ BUG_ON(bio_offset(bio) || bio_iovec(bio)->bv_len != sdp->sector_size);
+
+ sector >>= ilog2(sdp->sector_size) - 9;
+ nr_sectors >>= ilog2(sdp->sector_size) - 9;
+
+ rq->__data_len = sdp->sector_size;
+ rq->timeout = SD_WRITE_SAME_TIMEOUT;
+ memset(rq->cmd, 0, rq->cmd_len);
+
+ if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) {
+ rq->cmd_len = 16;
+ rq->cmd[0] = WRITE_SAME_16;
+ put_unaligned_be64(sector, &rq->cmd[2]);
+ put_unaligned_be32(nr_sectors, &rq->cmd[10]);
+ } else {
+ rq->cmd_len = 10;
+ rq->cmd[0] = WRITE_SAME;
+ put_unaligned_be32(sector, &rq->cmd[2]);
+ put_unaligned_be16(nr_sectors, &rq->cmd[7]);
+ }
+
+ return scsi_setup_blk_pc_cmnd(sdp, rq);
+}
+
static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
{
rq->timeout = SD_FLUSH_TIMEOUT;
@@ -660,6 +778,9 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
if (rq->cmd_flags & REQ_DISCARD) {
ret = scsi_setup_discard_cmnd(sdp, rq);
goto out;
+ } else if (rq->cmd_flags & REQ_WRITE_SAME) {
+ ret = sd_setup_write_same_cmnd(sdp, rq);
+ goto out;
} else if (rq->cmd_flags & REQ_FLUSH) {
ret = scsi_setup_flush_cmnd(sdp, rq);
goto out;
@@ -1375,13 +1496,31 @@ static int sd_done(struct scsi_cmnd *SCpnt)
unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
struct scsi_sense_hdr sshdr;
struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
+ struct request *req = SCpnt->request;
int sense_valid = 0;
int sense_deferred = 0;
unsigned char op = SCpnt->cmnd[0];
+ unsigned char unmap = SCpnt->cmnd[1] & 8;
if ((SCpnt->request->cmd_flags & REQ_DISCARD) && !result)
scsi_set_resid(SCpnt, 0);
+ if (SCpnt->request->cmd_flags & REQ_WRITE_SAME) {
+ /*
+ * Complete entire block range, not just the single
+ * block that was actually transferred.
+ */
+ req->__data_len = req->bio->bi_size;
+
+ if (!result) {
+ good_bytes = req->__data_len;
+ scsi_set_resid(SCpnt, 0);
+ } else {
+ good_bytes = 0;
+ scsi_set_resid(SCpnt, req->__data_len);
+ }
+ }
+
if (result) {
sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
if (sense_valid)
@@ -1427,9 +1566,25 @@ static int sd_done(struct scsi_cmnd *SCpnt)
if (sshdr.asc == 0x10) /* DIX: Host detected corruption */
good_bytes = sd_completed_bytes(SCpnt);
/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
- if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
- (op == UNMAP || op == WRITE_SAME_16 || op == WRITE_SAME))
- sd_config_discard(sdkp, SD_LBP_DISABLE);
+ if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
+ switch (op) {
+ case UNMAP:
+ sd_config_discard(sdkp, SD_LBP_DISABLE);
+ break;
+ case WRITE_SAME_16:
+ case WRITE_SAME:
+ if (unmap)
+ sd_config_discard(sdkp, SD_LBP_DISABLE);
+ else {
+ sdkp->device->no_write_same = 1;
+ sd_config_write_same(sdkp);
+
+ good_bytes = 0;
+ req->__data_len = req->bio->bi_size;
+ req->cmd_flags |= REQ_QUIET;
+ }
+ }
+ }
break;
default:
break;
@@ -2247,9 +2402,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
if (buffer[3] == 0x3c) {
unsigned int lba_count, desc_count;
- sdkp->max_ws_blocks =
- (u32) min_not_zero(get_unaligned_be64(&buffer[36]),
- (u64)0xffffffff);
+ sdkp->max_ws_blocks = (u32)get_unaligned_be64(&buffer[36]);
if (!sdkp->lbpme)
goto out;
@@ -2342,6 +2495,13 @@ static void sd_read_block_provisioning(struct scsi_disk *sdkp)
kfree(buffer);
}
+static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer)
+{
+ if (scsi_report_opcode(sdkp->device, buffer, SD_BUF_SIZE,
+ WRITE_SAME_16))
+ sdkp->ws16 = 1;
+}
+
static int sd_try_extended_inquiry(struct scsi_device *sdp)
{
/*
@@ -2401,6 +2561,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
sd_read_write_protect_flag(sdkp, buffer);
sd_read_cache_type(sdkp, buffer);
sd_read_app_tag_own(sdkp, buffer);
+ sd_read_write_same(sdkp, buffer);
}
sdkp->first_scan = 0;
@@ -2418,6 +2579,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
blk_queue_flush(sdkp->disk->queue, flush);
set_capacity(disk, sdkp->capacity);
+ sd_config_write_same(sdkp);
kfree(buffer);
out:
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4163f29..5923e42 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -14,6 +14,7 @@
#define SD_TIMEOUT (30 * HZ)
#define SD_MOD_TIMEOUT (75 * HZ)
#define SD_FLUSH_TIMEOUT (60 * HZ)
+#define SD_WRITE_SAME_TIMEOUT (120 * HZ)
/*
* Number of allowed retries
@@ -38,6 +39,11 @@ enum {
};
enum {
+ SD_MAX_WS10_BLOCKS = 0xffff,
+ SD_MAX_WS16_BLOCKS = 0x7fffff,
+};
+
+enum {
SD_LBP_FULL = 0, /* Full logical block provisioning */
SD_LBP_UNMAP, /* Use UNMAP command */
SD_LBP_WS16, /* Use WRITE SAME(16) with UNMAP bit */
@@ -74,6 +80,7 @@ struct scsi_disk {
unsigned lbpws : 1;
unsigned lbpws10 : 1;
unsigned lbpvpd : 1;
+ unsigned ws16 : 1;
};
#define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 61178b8..7aaf5d4 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -200,6 +200,9 @@ static int slave_configure(struct scsi_device *sdev)
/* Do not attempt to use REPORT SUPPORTED OPERATION CODES */
sdev->no_report_opcodes = 1;
+ /* Do not attempt to use WRITE SAME */
+ sdev->no_write_same = 1;
+
/* Some disks return the total number of blocks in response
* to READ CAPACITY rather than the highest block number.
* If this device makes that mistake, tell the sd driver. */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 59f31dc..c1d8c2f 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -135,6 +135,7 @@ struct scsi_device {
unsigned use_10_for_rw:1; /* first try 10-byte read / write */
unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */
+ unsigned no_write_same:1; /* no WRITE SAME command */
unsigned skip_ms_page_8:1; /* do not use MODE SENSE page 0x08 */
unsigned skip_ms_page_3f:1; /* do not use MODE SENSE page 0x3f */
unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */
--
1.7.7.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 7/7] sd: Use sd_ prefix for flush and discard functions
2012-03-16 18:43 Write same support v4 Martin K. Petersen
` (5 preceding siblings ...)
2012-03-16 18:43 ` [PATCH 6/7] sd: Implement support for WRITE SAME Martin K. Petersen
@ 2012-03-16 18:43 ` Martin K. Petersen
6 siblings, 0 replies; 9+ messages in thread
From: Martin K. Petersen @ 2012-03-16 18:43 UTC (permalink / raw)
To: jaxboe, jbottomley; +Cc: linux-scsi, Martin K. Petersen
From: "Martin K. Petersen" <martin.petersen@oracle.com>
The commands to set up flush and discard operations used a scsi_ prefix
despite being local to the sd driver. Use an sd_ prefix instead.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/scsi/sd.c | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 26c33db..0c5c4b2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -575,14 +575,14 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
}
/**
- * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device
+ * sd_setup_discard_cmnd - unmap blocks on thinly provisioned device
* @sdp: scsi device to operate one
* @rq: Request to prepare
*
* Will issue either UNMAP or WRITE SAME(16) depending on preference
* indicated by target device.
**/
-static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
+static int sd_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
{
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
struct bio *bio = rq->bio;
@@ -733,7 +733,7 @@ static int sd_setup_write_same_cmnd(struct scsi_device *sdp, struct request *rq)
return scsi_setup_blk_pc_cmnd(sdp, rq);
}
-static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
+static int sd_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
{
rq->timeout = SD_FLUSH_TIMEOUT;
rq->retries = SD_MAX_RETRIES;
@@ -776,13 +776,13 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
* block PC requests to make life easier.
*/
if (rq->cmd_flags & REQ_DISCARD) {
- ret = scsi_setup_discard_cmnd(sdp, rq);
+ ret = sd_setup_discard_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_flags & REQ_WRITE_SAME) {
ret = sd_setup_write_same_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_flags & REQ_FLUSH) {
- ret = scsi_setup_flush_cmnd(sdp, rq);
+ ret = sd_setup_flush_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
ret = scsi_setup_blk_pc_cmnd(sdp, rq);
--
1.7.7.6
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2012-03-16 18:43 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-16 18:43 Write same support v4 Martin K. Petersen
2012-03-16 18:43 ` [PATCH 1/7] block: Clean up discard handling logic Martin K. Petersen
2012-03-16 18:43 ` [PATCH 2/7] block: Implement support for WRITE SAME Martin K. Petersen
2012-03-16 18:43 ` [PATCH 3/7] block: Make blkdev_issue_zeroout use " Martin K. Petersen
2012-03-16 18:43 ` [PATCH 4/7] block: ioctl to zero block ranges Martin K. Petersen
2012-03-16 18:43 ` [PATCH 5/7] scsi: Add a report opcode helper Martin K. Petersen
2012-03-16 18:43 ` [PATCH 6/7] sd: Implement support for WRITE SAME Martin K. Petersen
2012-03-16 18:43 ` [PATCH 7/7] sd: Use sd_ prefix for flush and discard functions Martin K. Petersen
-- strict thread matches above, loose matches on Subject: below --
2012-03-02 3:22 Write same support v3 Martin K. Petersen
2012-03-02 3:22 ` [PATCH 4/7] block: ioctl to zero block ranges Martin K. Petersen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).