linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: jaxboe@fusionio.com
Cc: hughd@google.com, hirofumi@mail.parknet.co.jp, tytso@mit.edu,
	chris.mason@oracle.com, swhiteho@redhat.com,
	martin.petersen@oracle.com, linux-fsdevel@vger.kernel.org
Subject: Re: discard and barriers
Date: Mon, 23 Aug 2010 18:42:04 +0200	[thread overview]
Message-ID: <20100823164204.GB31107@lst.de> (raw)
In-Reply-To: <20100814115625.GA15902@lst.de>

Jens, do you think the following patch is still okay for 3.6.36?
It fixes the massive performance regression due to the full barriers
for discard and also makes the barrier removal in 2.6.37 a lot simpler.

---

From: Christoph Hellwig <hch@lst.de>
Subject: [PATCH] block: do not send discards as barriers

There is no reason to send discards as barriers.  The rationale for that
is easy:  The current barriers do two things, flushing caches and provide
ordering.  There is no reason for a cache flush before a discard because
the discard doesn't care for ordering vs writes to other regions than
the one it discards, and there is no reason for a cache flush afterwards
as a discard doesn't store data.  The ordering semantics aren't used
currently because all discards are done synchronously and thus we
order explicitly.

Even more until very late in the 2.6.35 cycle we didn't send DISCARD_BARRIER
requests as real hardbarrier but as an elevator only barrier which doesn't
provide the above semantics, and the switch to real barriers causes masssive
performance regressions especially for the swap code.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: linux-2.6/block/blk-lib.c
===================================================================
--- linux-2.6.orig/block/blk-lib.c	2010-08-23 15:16:37.656033352 +0200
+++ linux-2.6/block/blk-lib.c	2010-08-23 15:16:45.913012470 +0200
@@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_de
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
-	int type = flags & BLKDEV_IFL_BARRIER ?
-		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	int type = REQ_WRITE | REQ_DISCARD;
 	unsigned int max_discard_sectors;
 	struct bio *bio;
 	int ret = 0;
@@ -65,7 +64,7 @@ int blkdev_issue_discard(struct block_de
 	if (flags & BLKDEV_IFL_SECURE) {
 		if (!blk_queue_secdiscard(q))
 			return -EOPNOTSUPP;
-		type |= DISCARD_SECURE;
+		type |= REQ_SECURE;
 	}
 
 	while (nr_sects && !ret) {
@@ -162,12 +161,6 @@ int blkdev_issue_zeroout(struct block_de
 	bb.wait = &wait;
 	bb.end_io = NULL;
 
-	if (flags & BLKDEV_IFL_BARRIER) {
-		/* issue async barrier before the data */
-		ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
-		if (ret)
-			return ret;
-	}
 submit:
 	ret = 0;
 	while (nr_sects != 0) {
@@ -199,13 +192,6 @@ submit:
 		issued++;
 		submit_bio(WRITE, bio);
 	}
-	/*
-	 * When all data bios are in flight. Send final barrier if requeted.
-	 */
-	if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
-		ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
-					flags & BLKDEV_IFL_WAIT);
-
 
 	if (flags & BLKDEV_IFL_WAIT)
 		/* Wait for bios in-flight */
Index: linux-2.6/fs/btrfs/extent-tree.c
===================================================================
--- linux-2.6.orig/fs/btrfs/extent-tree.c	2010-08-23 15:16:37.677004089 +0200
+++ linux-2.6/fs/btrfs/extent-tree.c	2010-08-23 15:16:45.918004368 +0200
@@ -1696,7 +1696,7 @@ static void btrfs_issue_discard(struct b
 				u64 start, u64 len)
 {
 	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
-			BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+			BLKDEV_IFL_WAIT);
 }
 
 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
Index: linux-2.6/fs/gfs2/rgrp.c
===================================================================
--- linux-2.6.orig/fs/gfs2/rgrp.c	2010-08-23 15:16:37.684004298 +0200
+++ linux-2.6/fs/gfs2/rgrp.c	2010-08-23 15:16:45.927004298 +0200
@@ -854,8 +854,7 @@ static void gfs2_rgrp_send_discards(stru
 				if ((start + nr_sects) != blk) {
 					rv = blkdev_issue_discard(bdev, start,
 							    nr_sects, GFP_NOFS,
-							    BLKDEV_IFL_WAIT |
-							    BLKDEV_IFL_BARRIER);
+							    BLKDEV_IFL_WAIT);
 					if (rv)
 						goto fail;
 					nr_sects = 0;
@@ -870,7 +869,7 @@ start_new_extent:
 	}
 	if (nr_sects) {
 		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
-					 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+					 BLKDEV_IFL_WAIT);
 		if (rv)
 			goto fail;
 	}
Index: linux-2.6/fs/nilfs2/the_nilfs.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/the_nilfs.c	2010-08-23 15:16:37.692003949 +0200
+++ linux-2.6/fs/nilfs2/the_nilfs.c	2010-08-23 15:16:45.928014774 +0200
@@ -775,8 +775,7 @@ int nilfs_discard_segments(struct the_ni
 						   start * sects_per_block,
 						   nblocks * sects_per_block,
 						   GFP_NOFS,
-						   BLKDEV_IFL_WAIT |
-						   BLKDEV_IFL_BARRIER);
+						   BLKDEV_IFL_WAIT);
 			if (ret < 0)
 				return ret;
 			nblocks = 0;
@@ -787,7 +786,7 @@ int nilfs_discard_segments(struct the_ni
 					   start * sects_per_block,
 					   nblocks * sects_per_block,
 					   GFP_NOFS,
-					  BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+					  BLKDEV_IFL_WAIT);
 	return ret;
 }
 
Index: linux-2.6/include/linux/blkdev.h
===================================================================
--- linux-2.6.orig/include/linux/blkdev.h	2010-08-23 15:16:37.700013168 +0200
+++ linux-2.6/include/linux/blkdev.h	2010-08-23 15:16:45.931003739 +0200
@@ -921,11 +921,9 @@ static inline struct request *blk_map_qu
 }
 enum{
 	BLKDEV_WAIT,	/* wait for completion */
-	BLKDEV_BARRIER,	/* issue request with barrier */
 	BLKDEV_SECURE,	/* secure discard */
 };
 #define BLKDEV_IFL_WAIT		(1 << BLKDEV_WAIT)
-#define BLKDEV_IFL_BARRIER	(1 << BLKDEV_BARRIER)
 #define BLKDEV_IFL_SECURE	(1 << BLKDEV_SECURE)
 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
 			unsigned long);
@@ -939,7 +937,7 @@ static inline int sb_issue_discard(struc
 	block <<= (sb->s_blocksize_bits - 9);
 	nr_blocks <<= (sb->s_blocksize_bits - 9);
 	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
-				   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+				    BLKDEV_IFL_WAIT);
 }
 
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2010-08-23 15:16:37.709012819 +0200
+++ linux-2.6/include/linux/fs.h	2010-08-23 15:16:45.938005835 +0200
@@ -159,14 +159,6 @@ struct inodes_stat_t {
 #define WRITE_BARRIER		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
 				 REQ_HARDBARRIER)
 
-/*
- * These aren't really reads or writes, they pass down information about
- * parts of device that are now unused by the file system.
- */
-#define DISCARD_NOBARRIER	(WRITE | REQ_DISCARD)
-#define DISCARD_BARRIER		(WRITE | REQ_DISCARD | REQ_HARDBARRIER)
-#define DISCARD_SECURE		(DISCARD_NOBARRIER | REQ_SECURE)
-
 #define SEL_IN		1
 #define SEL_OUT		2
 #define SEL_EX		4
Index: linux-2.6/mm/swapfile.c
===================================================================
--- linux-2.6.orig/mm/swapfile.c	2010-08-23 15:16:37.724015124 +0200
+++ linux-2.6/mm/swapfile.c	2010-08-23 15:16:45.948004996 +0200
@@ -142,7 +142,7 @@ static int discard_swap(struct swap_info
 	if (nr_blocks) {
 		err = blkdev_issue_discard(si->bdev, start_block,
 				nr_blocks, GFP_KERNEL,
-				BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+				BLKDEV_IFL_WAIT);
 		if (err)
 			return err;
 		cond_resched();
@@ -154,7 +154,7 @@ static int discard_swap(struct swap_info
 
 		err = blkdev_issue_discard(si->bdev, start_block,
 				nr_blocks, GFP_KERNEL,
-				BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+				BLKDEV_IFL_WAIT);
 		if (err)
 			break;
 
@@ -193,8 +193,7 @@ static void discard_swap_cluster(struct
 			start_block <<= PAGE_SHIFT - 9;
 			nr_blocks <<= PAGE_SHIFT - 9;
 			if (blkdev_issue_discard(si->bdev, start_block,
-				    nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT |
-							BLKDEV_IFL_BARRIER))
+				    nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT))
 				break;
 		}
 

      parent reply	other threads:[~2010-08-23 16:42 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-14 11:56 discard and barriers Christoph Hellwig
2010-08-14 14:14 ` Ted Ts'o
2010-08-14 14:52   ` Christoph Hellwig
2010-08-14 15:46     ` Chris Mason
2010-08-14 17:22       ` Christoph Hellwig
2010-08-14 20:11       ` Hugh Dickins
2010-08-15 17:39     ` Ted Ts'o
2010-08-15 19:02       ` Christoph Hellwig
2010-08-15 21:25         ` Ted Ts'o
2010-08-15 21:30           ` Christoph Hellwig
2010-08-16  9:41     ` Steven Whitehouse
2010-08-16 11:26       ` Christoph Hellwig
2010-08-17 10:59         ` Steven Whitehouse
2010-08-23 16:42 ` Christoph Hellwig [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100823164204.GB31107@lst.de \
    --to=hch@lst.de \
    --cc=chris.mason@oracle.com \
    --cc=hirofumi@mail.parknet.co.jp \
    --cc=hughd@google.com \
    --cc=jaxboe@fusionio.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=swhiteho@redhat.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).