From: David Woodhouse <dwmw2@infradead.org>
To: Jens Axboe <jens.axboe@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Ric Wheeler <ricwheeler@gmail.com>,
linux-fsdevel@vger.kernel.org, gilad@codefidence.com
Subject: Re: [RFC] 'discard sectors' block request.
Date: Tue, 05 Aug 2008 17:29:01 +0100 [thread overview]
Message-ID: <1217953741.3454.784.camel@pmac.infradead.org> (raw)
In-Reply-To: <20080805114210.GW20055@kernel.dk>
On Tue, 2008-08-05 at 13:42 +0200, Jens Axboe wrote:
<some stuff>
Incremental patch below; I'll commit it to a git tree in discrete
patches, and publish those.
You can just pass NULL as your end_io function to blkdev_issue_discard()
and it'll set it to bio_put() for you.
We'll probably want to make the elevator capable of merging discard
requests -- and even letting them cross real read/write requests for
other sectors, and just dropping them if there's a subsequent write
request covering the same region, etc.
diff -u b/block/blk-barrier.c b/block/blk-barrier.c
--- b/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -320,17 +321,16 @@
* @bdev: blockdev to issue discard for
* @sector: start sector
* @nr_sects: number of sectors to discard
- * @error_sector: error sector
+ * @end_io: end_io function (or NULL)
*
* Description:
- * Issue a discard request for the sectors in question. Caller can supply
- * room for storing the error offset in case of a discard error, if they
- * wish to.
+ * Issue a discard request for the sectors in question. Caller can pass an
+ * end_io function (which must call bio_put()), if they really want to see
+ * the outcome. Most callers probably won't, and can just pass NULL.
*/
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- unsigned nr_sects, sector_t *error_sector)
+ unsigned nr_sects, bio_end_io_t end_io)
{
- DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q;
struct bio *bio;
int ret;
@@ -343,33 +343,24 @@
return -ENXIO;
+ /* Many callers won't care at all about the outcome. After all,
+ this is just a hint to the underlying device. They'll just
+ ignore errors completely. So the end_io function can be just
+ a call to bio_put() */
+ if (!end_io)
+ end_io = (void *)&bio_put;
+
+ if (!q->discard_fn)
+ return -EOPNOTSUPP;
+
bio = bio_alloc(GFP_KERNEL, 0);
if (!bio)
return -ENOMEM;
- bio->bi_end_io = bio_end_empty_barrier;
- bio->bi_private = &wait;
+ bio->bi_end_io = end_io;
bio->bi_bdev = bdev;
bio->bi_sector = sector;
bio->bi_size = nr_sects << 9;
submit_bio(1 << BIO_RW_DISCARD, bio);
-
- wait_for_completion(&wait);
-
- /*
- * The driver must store the error location in ->bi_sector, if
- * it supports it. For non-stacked drivers, this should be copied
- * from rq->sector.
- */
- if (error_sector)
- *error_sector = bio->bi_sector;
-
- ret = 0;
- if (bio_flagged(bio, BIO_EOPNOTSUPP))
- ret = -EOPNOTSUPP;
- else if (!bio_flagged(bio, BIO_UPTODATE))
- ret = -EIO;
-
- bio_put(bio);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(blkdev_issue_discard);
diff -u b/block/blk-core.c b/block/blk-core.c
--- b/block/blk-core.c
+++ b/block/blk-core.c
@@ -1502,10 +1501,9 @@
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
- if (!bio_empty_barrier(bio) && !bio_discard(bio)) {
+ if (!bio_dataless(bio)) {
BIO_BUG_ON(!bio->bi_size);
- BIO_BUG_ON(!bio->bi_io_vec);
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
@@ -1576,12 +1574,13 @@
int nbytes;
/*
- * For an empty barrier request or sector discard request, the
- * low level driver must store a potential error location in
- * ->sector. We pass that back up in ->bi_sector.
+ * For an empty barrier request, the low level driver must
+ * store a potential error location in ->sector. We pass
+ * that back up in ->bi_sector.
*/
- if (blk_empty_barrier(req) || blk_discard_rq(req))
+ if (blk_empty_barrier(req))
bio->bi_sector = req->sector;
+
if (nr_bytes >= bio->bi_size) {
req->bio = bio->bi_next;
nbytes = bio->bi_size;
diff -u b/fs/fat/fatent.c b/fs/fat/fatent.c
--- b/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -528,15 +528,6 @@
return err;
}
-static void fat_discard_cluster(struct super_block *sb, int cluster)
-{
- struct msdos_sb_info *sbi = MSDOS_SB(sb);
- struct block_device *bdev = sb->s_bdev;
- unsigned long sector = fat_clus_to_blknr(sbi, cluster);
-
- blkdev_issue_discard(bdev, sector, sbi->sec_per_clus, NULL);
-}
-
int fat_free_clusters(struct inode *inode, int cluster)
{
struct super_block *sb = inode->i_sb;
@@ -550,8 +541,11 @@
fatent_init(&fatent);
lock_fat(sbi);
do {
- fat_discard_cluster(sb, cluster);
-
+ /* Issue discard for the sectors we no longer care about */
+ blkdev_issue_discard(sb->s_bdev,
+ fat_clus_to_blknr(sbi, cluster),
+ sbi->sec_per_clus, NULL);
+
cluster = fat_ent_read(inode, &fatent, cluster);
if (cluster < 0) {
err = cluster;
diff -u b/include/linux/bio.h b/include/linux/bio.h
--- b/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -191,6 +191,8 @@
#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD))
#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size)
+#define bio_dataless(bio) (!(bio)->bi_io_vec)
+
static inline unsigned int bio_cur_sectors(struct bio *bio)
{
if (unlikely(bio_discard(bio)))
diff -u b/include/linux/blkdev.h b/include/linux/blkdev.h
--- b/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -546,6 +546,7 @@
#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
/* rq->queuelist of dequeued request must be list_empty() */
#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))
+#define blk_dataless_rq(rq) (!(rq)->buffer)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
@@ -836,7 +837,7 @@
}
extern int blkdev_issue_flush(struct block_device *, sector_t *);
-extern int blkdev_issue_discard(struct block_device *, sector_t, unsigned, sector_t *);
+extern int blkdev_issue_discard(struct block_device *, sector_t, unsigned, bio_end_io_t *);
/*
* command filter functions
--
David Woodhouse Open Source Technology Centre
David.Woodhouse@intel.com Intel Corporation
next prev parent reply other threads:[~2008-08-05 16:29 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <488B7281.4020007@gmail.com>
[not found] ` <20080726130200.f541e604.akpm@linux-foundation.org>
2008-08-05 1:45 ` [RFC] 'discard sectors' block request David Woodhouse
2008-08-05 1:59 ` Andrew Morton
2008-08-05 9:01 ` David Woodhouse
2008-08-05 11:42 ` Jens Axboe
2008-08-05 13:32 ` David Woodhouse
2008-08-05 14:21 ` Ric Wheeler
2008-08-05 16:29 ` David Woodhouse [this message]
2008-08-05 17:25 ` David Woodhouse
2008-08-06 9:25 ` [PATCH 1/5] [BLOCK] Add 'discard' request handling David Woodhouse
2008-08-06 16:19 ` OGAWA Hirofumi
2008-08-06 18:18 ` David Woodhouse
2008-08-06 19:28 ` OGAWA Hirofumi
2008-08-07 16:32 ` [PATCH 1/5, v2] " David Woodhouse
2008-08-07 18:41 ` [PATCH 1/5] " Jens Axboe
2008-08-08 9:33 ` David Woodhouse
2008-08-08 10:30 ` Jens Axboe
2008-08-08 10:49 ` Jens Axboe
2008-08-08 11:04 ` David Woodhouse
2008-08-08 11:20 ` Jens Axboe
2008-08-08 10:52 ` David Woodhouse
2008-08-08 11:09 ` Jens Axboe
2008-08-08 11:18 ` Jens Axboe
2008-08-08 11:29 ` David Woodhouse
2008-08-08 11:44 ` Jens Axboe
2008-08-08 11:47 ` Jens Axboe
2008-08-08 12:05 ` David Woodhouse
2008-08-08 12:13 ` Jens Axboe
2008-08-08 12:32 ` David Woodhouse
2008-08-08 12:37 ` Jens Axboe
2008-08-08 12:49 ` David Woodhouse
2008-08-10 1:05 ` Jamie Lokier
2008-08-08 15:32 ` David Woodhouse
2008-08-08 14:22 ` Matthew Wilcox
2008-08-08 14:27 ` David Woodhouse
2008-08-08 14:34 ` Matthew Wilcox
2008-08-06 9:25 ` [PATCH 2/5] [FAT] Let the block device know when sectors can be discarded David Woodhouse
2008-08-06 16:40 ` OGAWA Hirofumi
2008-08-06 17:14 ` OGAWA Hirofumi
2008-08-06 18:11 ` David Woodhouse
2008-08-06 19:10 ` OGAWA Hirofumi
2008-08-06 19:50 ` OGAWA Hirofumi
2008-08-06 20:10 ` OGAWA Hirofumi
2008-08-06 21:37 ` David Woodhouse
2008-08-07 16:09 ` David Woodhouse
2008-08-07 16:33 ` [PATCH 2/5, v2] " David Woodhouse
2008-08-06 9:25 ` [PATCH 3/5] [MTD] Support 'discard sectors' operation in translation layer support core David Woodhouse
2008-08-06 9:25 ` [PATCH 4/5] [MTD] [FTL] Support 'discard sectors' operation David Woodhouse
2008-08-06 9:25 ` [PATCH 5/5] [BLOCK] Fix up comments about matching flags between bio and rq David Woodhouse
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1217953741.3454.784.camel@pmac.infradead.org \
--to=dwmw2@infradead.org \
--cc=akpm@linux-foundation.org \
--cc=gilad@codefidence.com \
--cc=jens.axboe@oracle.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=ricwheeler@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).