linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* What happened to TRIM support for raid linear/0/1/10?
@ 2012-08-08 13:10 Holger Kiehl
  2012-08-08 17:58 ` Lutz Vieweg
  2012-08-09  3:24 ` NeilBrown
  0 siblings, 2 replies; 4+ messages in thread
From: Holger Kiehl @ 2012-08-08 13:10 UTC (permalink / raw)
  To: Shaohua Li, neilb@suse.de
  Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org

[-- Attachment #1: Type: TEXT/PLAIN, Size: 681 bytes --]

Hello,

I have been using the patches posted by Shaohua Li on 16th March 2012:

    http://lkml.indiana.edu/hypermail/linux/kernel/1203.2/00048.html

for several month on a very busy file server (serving 9 million files
with 5.3 TiB daily) without any problems.

Is there any chance that these patches will go into the official kernel?
Or what is the reason that these patches are no applied?

I have attached the patch set in one big patch for 3.5. Please do not
use it since I am not sure if it is correct. Shaohua could you please
take a look if it is correct and maybe post a new one?

Personally, I would think that TRIM support MD would be a very good thing.

Regards,
Holger

[-- Attachment #2: Type: TEXT/PLAIN, Size: 11224 bytes --]

diff -u --recursive --new-file linux-3.5.orig/drivers/md/linear.c linux-3.5/drivers/md/linear.c
--- linux-3.5.orig/drivers/md/linear.c	2012-07-21 20:58:29.000000000 +0000
+++ linux-3.5/drivers/md/linear.c	2012-07-27 06:53:39.507121434 +0000
@@ -138,6 +138,7 @@
 	struct linear_conf *conf;
 	struct md_rdev *rdev;
 	int i, cnt;
+	bool discard_supported = false;
 
 	conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
 			GFP_KERNEL);
@@ -171,6 +172,8 @@
 		conf->array_sectors += rdev->sectors;
 		cnt++;
 
+		if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+			discard_supported = true;
 	}
 	if (cnt != raid_disks) {
 		printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n",
@@ -178,6 +181,11 @@
 		goto out;
 	}
 
+	if (!discard_supported)
+		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+	else
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+
 	/*
 	 * Here we calculate the device offsets.
 	 */
@@ -326,6 +334,14 @@
 	bio->bi_sector = bio->bi_sector - start_sector
 		+ tmp_dev->rdev->data_offset;
 	rcu_read_unlock();
+
+	if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+		!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
+		/* Just ignore it */
+		bio_endio(bio, 0);
+		return;
+	}
+
 	generic_make_request(bio);
 }
 
diff -u --recursive --new-file linux-3.5.orig/drivers/md/raid0.c linux-3.5/drivers/md/raid0.c
--- linux-3.5.orig/drivers/md/raid0.c	2012-07-21 20:58:29.000000000 +0000
+++ linux-3.5/drivers/md/raid0.c	2012-07-27 06:53:39.507121434 +0000
@@ -88,6 +88,7 @@
 	char b[BDEVNAME_SIZE];
 	char b2[BDEVNAME_SIZE];
 	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
+	bool discard_supported = false;
 
 	if (!conf)
 		return -ENOMEM;
@@ -195,6 +196,9 @@
 		if (!smallest || (rdev1->sectors < smallest->sectors))
 			smallest = rdev1;
 		cnt++;
+
+		if (blk_queue_discard(bdev_get_queue(rdev1->bdev)))
+			discard_supported = true;
 	}
 	if (cnt != mddev->raid_disks) {
 		printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
@@ -272,6 +276,11 @@
 	blk_queue_io_opt(mddev->queue,
 			 (mddev->chunk_sectors << 9) * mddev->raid_disks);
 
+	if (!discard_supported)
+		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+	else
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+
 	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
 	*private_conf = conf;
 
@@ -422,6 +431,7 @@
 	if (md_check_no_bitmap(mddev))
 		return -EINVAL;
 	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
+	blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
 
 	/* if private is not null, we are here after takeover */
 	if (mddev->private == NULL) {
@@ -509,7 +519,7 @@
 		sector_t sector = bio->bi_sector;
 		struct bio_pair *bp;
 		/* Sanity check -- queue functions should prevent this happening */
-		if (bio->bi_vcnt != 1 ||
+		if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
 		    bio->bi_idx != 0)
 			goto bad_map;
 		/* This is a one page bio that upper layers
@@ -535,6 +545,13 @@
 	bio->bi_sector = sector_offset + zone->dev_start +
 		tmp_dev->data_offset;
 
+	if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+		!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
+		/* Just ignore it */
+		bio_endio(bio, 0);
+		return;
+	}
+
 	generic_make_request(bio);
 	return;
 
diff -u --recursive --new-file linux-3.5.orig/drivers/md/raid10.c linux-3.5/drivers/md/raid10.c
--- linux-3.5.orig/drivers/md/raid10.c	2012-07-21 20:58:29.000000000 +0000
+++ linux-3.5/drivers/md/raid10.c	2012-07-27 06:53:39.507121435 +0000
@@ -887,7 +887,12 @@
 		while (bio) { /* submit pending writes */
 			struct bio *next = bio->bi_next;
 			bio->bi_next = NULL;
-			generic_make_request(bio);
+			if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+			    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+				/* Just ignore it */
+				bio_endio(bio, 0);
+			else
+				generic_make_request(bio);
 			bio = next;
 		}
 	} else
@@ -1017,7 +1022,7 @@
 }
 
 static sector_t choose_data_offset(struct r10bio *r10_bio,
-				   struct md_rdev *rdev)
+				struct md_rdev *rdev)
 {
 	if (!test_bit(MD_RECOVERY_RESHAPE, &rdev->mddev->recovery) ||
 	    test_bit(R10BIO_Previous, &r10_bio->state))
@@ -1037,6 +1042,7 @@
 	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
 	const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
+	const unsigned long do_discard = (bio->bi_rw & (REQ_DISCARD | REQ_SECURE));
 	unsigned long flags;
 	struct md_rdev *blocked_rdev;
 	int sectors_handled;
@@ -1057,7 +1063,7 @@
 			 || conf->prev.near_copies < conf->prev.raid_disks))) {
 		struct bio_pair *bp;
 		/* Sanity check -- queue functions should prevent this happening */
-		if (bio->bi_vcnt != 1 ||
+		if ((bio->bi_vcnt != 1 && bio->bi_vcnt !=0) ||
 		    bio->bi_idx != 0)
 			goto bad_map;
 		/* This is a one page bio that upper layers
@@ -1386,7 +1392,7 @@
 						      conf->mirrors[d].rdev));
 		mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
 		mbio->bi_end_io	= raid10_end_write_request;
-		mbio->bi_rw = WRITE | do_sync | do_fua;
+		mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
 		mbio->bi_private = r10_bio;
 
 		atomic_inc(&r10_bio->remaining);
@@ -1415,7 +1421,7 @@
 					   conf->mirrors[d].replacement));
 		mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
 		mbio->bi_end_io	= raid10_end_write_request;
-		mbio->bi_rw = WRITE | do_sync | do_fua;
+		mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
 		mbio->bi_private = r10_bio;
 
 		atomic_inc(&r10_bio->remaining);
@@ -1699,6 +1705,9 @@
 		clear_bit(Unmerged, &rdev->flags);
 	}
 	md_integrity_add_rdev(rdev, mddev);
+	if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+
 	print_conf(conf);
 	return err;
 }
@@ -3457,6 +3466,7 @@
 	sector_t size;
 	sector_t min_offset_diff = 0;
 	int first = 1;
+	bool discard_supported = false;
 
 	if (mddev->private == NULL) {
 		conf = setup_conf(mddev);
@@ -3471,6 +3481,7 @@
 	mddev->thread = conf->thread;
 	conf->thread = NULL;
 
+	blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
 	chunk_size = mddev->chunk_sectors << 9;
 	blk_queue_io_min(mddev->queue, chunk_size);
 	if (conf->geo.raid_disks % conf->geo.near_copies)
@@ -3515,8 +3526,16 @@
 				  rdev->data_offset << 9);
 
 		disk->head_position = 0;
+
+		if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+			discard_supported = true;
 	}
 
+	if (discard_supported)
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+	else
+		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+
 	/* need to check that every block has at least one working mirror */
 	if (!enough(conf, -1)) {
 		printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
diff -u --recursive --new-file linux-3.5.orig/drivers/md/raid1.c linux-3.5/drivers/md/raid1.c
--- linux-3.5.orig/drivers/md/raid1.c	2012-07-21 20:58:29.000000000 +0000
+++ linux-3.5/drivers/md/raid1.c	2012-07-27 06:53:39.507121435 +0000
@@ -707,7 +707,12 @@
 		while (bio) { /* submit pending writes */
 			struct bio *next = bio->bi_next;
 			bio->bi_next = NULL;
-			generic_make_request(bio);
+			if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+			    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+				/* Just ignore it */
+				bio_endio(bio, 0);
+			else
+				generic_make_request(bio);
 			bio = next;
 		}
 	} else
@@ -882,6 +887,7 @@
 	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
 	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
+	const unsigned long do_discard = (bio->bi_rw & (REQ_DISCARD | REQ_SECURE));
 	struct md_rdev *blocked_rdev;
 	int first_clone;
 	int sectors_handled;
@@ -1181,7 +1187,7 @@
 				   conf->mirrors[i].rdev->data_offset);
 		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
 		mbio->bi_end_io	= raid1_end_write_request;
-		mbio->bi_rw = WRITE | do_flush_fua | do_sync;
+		mbio->bi_rw = WRITE | do_flush_fua | do_sync | do_discard;
 		mbio->bi_private = r1_bio;
 
 		atomic_inc(&r1_bio->remaining);
@@ -1424,6 +1430,8 @@
 		clear_bit(Unmerged, &rdev->flags);
 	}
 	md_integrity_add_rdev(rdev, mddev);
+	if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
 	print_conf(conf);
 	return err;
 }
@@ -2654,6 +2662,7 @@
 	int i;
 	struct md_rdev *rdev;
 	int ret;
+	bool discard_supported = false;
 
 	if (mddev->level != 1) {
 		printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
@@ -2683,8 +2692,16 @@
 			continue;
 		disk_stack_limits(mddev->gendisk, rdev->bdev,
 				  rdev->data_offset << 9);
+
+		if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+			discard_supported = true;
 	}
 
+	if (discard_supported)
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+	else
+		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+
 	mddev->degraded = 0;
 	for (i=0; i < conf->raid_disks; i++)
 		if (conf->mirrors[i].rdev == NULL ||
diff -u --recursive --new-file linux-3.5.orig/fs/bio.c linux-3.5/fs/bio.c
--- linux-3.5.orig/fs/bio.c	2012-07-21 20:58:29.000000000 +0000
+++ linux-3.5/fs/bio.c	2012-07-27 06:53:39.507121435 +0000
@@ -1500,7 +1500,7 @@
 	trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
 				bi->bi_sector + first_sectors);
 
-	BUG_ON(bi->bi_vcnt != 1);
+	BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0);
 	BUG_ON(bi->bi_idx != 0);
 	atomic_set(&bp->cnt, 3);
 	bp->error = 0;
@@ -1510,17 +1510,19 @@
 	bp->bio2.bi_size -= first_sectors << 9;
 	bp->bio1.bi_size = first_sectors << 9;
 
-	bp->bv1 = bi->bi_io_vec[0];
-	bp->bv2 = bi->bi_io_vec[0];
-	bp->bv2.bv_offset += first_sectors << 9;
-	bp->bv2.bv_len -= first_sectors << 9;
-	bp->bv1.bv_len = first_sectors << 9;
+	if (bi->bi_vcnt != 0) {
+		bp->bv1 = bi->bi_io_vec[0];
+		bp->bv2 = bi->bi_io_vec[0];
+		bp->bv2.bv_offset += first_sectors << 9;
+		bp->bv2.bv_len -= first_sectors << 9;
+		bp->bv1.bv_len = first_sectors << 9;
 
-	bp->bio1.bi_io_vec = &bp->bv1;
-	bp->bio2.bi_io_vec = &bp->bv2;
+		bp->bio1.bi_io_vec = &bp->bv1;
+		bp->bio2.bi_io_vec = &bp->bv2;
 
-	bp->bio1.bi_max_vecs = 1;
-	bp->bio2.bi_max_vecs = 1;
+		bp->bio1.bi_max_vecs = 1;
+		bp->bio2.bi_max_vecs = 1;
+	}
 
 	bp->bio1.bi_end_io = bio_pair_end_1;
 	bp->bio2.bi_end_io = bio_pair_end_2;
diff -u --recursive --new-file linux-3.5.orig/include/linux/blkdev.h linux-3.5/include/linux/blkdev.h
--- linux-3.5.orig/include/linux/blkdev.h	2012-07-21 20:58:29.000000000 +0000
+++ linux-3.5/include/linux/blkdev.h	2012-07-27 06:53:39.507121435 +0000
@@ -590,7 +590,7 @@
  * it already be started by driver.
  */
 #define RQ_NOMERGE_FLAGS	\
-	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
+	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
 	 (((rq)->cmd_flags & REQ_DISCARD) || \

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2012-08-09  5:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-08 13:10 What happened to TRIM support for raid linear/0/1/10? Holger Kiehl
2012-08-08 17:58 ` Lutz Vieweg
2012-08-09  3:24 ` NeilBrown
2012-08-09  5:14   ` Shaohua Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).