public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] PAGE_SIZE IO for RAW (RAW VARY)
@ 2002-01-09 17:41 Badari Pulavarty
  2002-01-09 17:58 ` Benjamin LaHaise
  2002-01-09 22:56 ` Alan Cox
  0 siblings, 2 replies; 32+ messages in thread
From: Badari Pulavarty @ 2002-01-09 17:41 UTC (permalink / raw)
  To: linux-kernel, marcelo, andrea; +Cc: pbadari

Hi,

Here is a 2.4.17 patch for doing PAGE_SIZE IO on raw devices. Instead 
of doing 512 byte buffer heads, the patch does 4K buffer heads. This
patch significantly reduced CPU overhead and increased IO throughput
in our database benchmark runs. (CPU went from 45% busy to 6% busy).

Could you please consider this for 2.4.18 release ? If you need the
patch on 2.4.18-preX, I can make one quickly.


NOTES:

1) wait_kio() no longer uses "size" argument. But I have not removed
   the arg, to minimize the diffs.

2) I do not like adding a new routine "submit_bh_blknr()" which is
   one line change from submit_bh(). Is there a better solution ?
   I have version of the patch which sets b_rsector to 0xffffffff
   in brw_kiovec() and check for this in submit_bh(). But I don't
   really like that hack. 

Thanks,
Badari

diff -Nur -X dontdiff linux/drivers/block/ll_rw_blk.c linux-2417vary/drivers/block/ll_rw_blk.c
--- linux/drivers/block/ll_rw_blk.c	Mon Oct 29 12:11:17 2001
+++ linux-2417vary/drivers/block/ll_rw_blk.c	Wed Jan  9 15:47:05 2002
@@ -915,6 +915,38 @@
 	}
 }
 
+/*
+ * submit_bh_blknr() - same as submit_bh() except that b_rsector is
+ * set to b_blocknr. Used for RAW VARY.
+ */
+void submit_bh_blknr(int rw, struct buffer_head * bh)
+{
+	int count = bh->b_size >> 9;
+
+	if (!test_bit(BH_Lock, &bh->b_state))
+		BUG();
+
+	set_bit(BH_Req, &bh->b_state);
+
+	/*
+	 * First step, 'identity mapping' - RAID or LVM might
+	 * further remap this.
+	 */
+	bh->b_rdev = bh->b_dev;
+	bh->b_rsector = bh->b_blocknr;
+
+	generic_make_request(rw, bh);
+
+	switch (rw) {
+		case WRITE:
+			kstat.pgpgout += count;
+			break;
+		default:
+			kstat.pgpgin += count;
+			break;
+	}
+}
+
 /**
  * ll_rw_block: low-level access to block devices
  * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
diff -Nur -X dontdiff linux/drivers/char/raw.c linux-2417vary/drivers/char/raw.c
--- linux/drivers/char/raw.c	Sat Sep 22 20:35:43 2001
+++ linux-2417vary/drivers/char/raw.c	Wed Jan  9 16:05:37 2002
@@ -350,8 +350,12 @@
 
 		for (i=0; i < blocks; i++) 
 			iobuf->blocks[i] = blocknr++;
+
+		iobuf->dovary = 1;
 		
 		err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size);
+
+		iobuf->dovary = 0;
 
 		if (rw == READ && err > 0)
 			mark_dirty_kiobuf(iobuf, err);
diff -Nur -X dontdiff linux/fs/buffer.c linux-2417vary/fs/buffer.c
--- linux/fs/buffer.c	Wed Jan  9 15:52:19 2002
+++ linux-2417vary/fs/buffer.c	Wed Jan  9 16:28:56 2002
@@ -2071,11 +2071,11 @@
 	err = 0;
 
 	for (i = nr; --i >= 0; ) {
-		iosize += size;
 		tmp = bh[i];
 		if (buffer_locked(tmp)) {
 			wait_on_buffer(tmp);
 		}
+		iosize += tmp->b_size;
 		
 		if (!buffer_uptodate(tmp)) {
 			/* We are traversing bh'es in reverse order so
@@ -2118,6 +2118,7 @@
 	struct kiobuf *	iobuf = NULL;
 	struct page *	map;
 	struct buffer_head *tmp, **bhs = NULL;
+	int		iosize = size;
 
 	if (!nr)
 		return 0;
@@ -2154,7 +2155,7 @@
 			}
 			
 			while (length > 0) {
-				blocknr = b[bufind++];
+				blocknr = b[bufind];
 				if (blocknr == -1UL) {
 					if (rw == READ) {
 						/* there was an hole in the filesystem */
@@ -2167,9 +2168,15 @@
 					} else
 						BUG();
 				}
+				if (iobuf->dovary) {
+					iosize = PAGE_SIZE - offset;
+					if (iosize > length)
+						iosize = length;
+				}
+				bufind += (iosize/size);
 				tmp = bhs[bhind++];
 
-				tmp->b_size = size;
+				tmp->b_size = iosize;
 				set_bh_page(tmp, map, offset);
 				tmp->b_this_page = tmp;
 
@@ -2185,7 +2192,10 @@
 					set_bit(BH_Uptodate, &tmp->b_state);
 
 				atomic_inc(&iobuf->io_count);
-				submit_bh(rw, tmp);
+				if (iobuf->dovary) 
+					submit_bh_blknr(rw, tmp);
+				else 
+					submit_bh(rw, tmp);
 				/* 
 				 * Wait for IO if we have got too much 
 				 */
@@ -2200,8 +2210,8 @@
 				}
 
 			skip_block:
-				length -= size;
-				offset += size;
+				length -= iosize;
+				offset += iosize;
 
 				if (offset >= PAGE_SIZE) {
 					offset = 0;
diff -Nur -X dontdiff linux/include/linux/iobuf.h linux-2417vary/include/linux/iobuf.h
--- linux/include/linux/iobuf.h	Thu Nov 22 11:46:26 2001
+++ linux-2417vary/include/linux/iobuf.h	Wed Jan  9 16:09:08 2002
@@ -44,7 +44,8 @@
 
 	struct page **	maplist;
 
-	unsigned int	locked : 1;	/* If set, pages has been locked */
+	unsigned int	locked : 1,	/* If set, pages has been locked */
+			dovary : 1;	/* If set, do PAGE_SIZE length IO */
 	
 	/* Always embed enough struct pages for atomic IO */
 	struct page *	map_array[KIO_STATIC_PAGES];

^ permalink raw reply	[flat|nested] 32+ messages in thread
[parent not found: <200201091741.g09HfAI17240@eng2.beaverton.ibm.com.suse.lists.linux.kernel>]
[parent not found: <200201102153.g0ALrl402482@eng2.beaverton.ibm.com>]

end of thread, other threads:[~2002-01-15  3:17 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-01-09 17:41 [PATCH] PAGE_SIZE IO for RAW (RAW VARY) Badari Pulavarty
2002-01-09 17:58 ` Benjamin LaHaise
2002-01-09 18:12   ` Badari Pulavarty
2002-01-09 18:21     ` Benjamin LaHaise
2002-01-09 19:28       ` Badari Pulavarty
2002-01-09 19:49         ` Benjamin LaHaise
2002-01-09 22:58         ` Alan Cox
2002-01-09 23:48           ` Badari Pulavarty
2002-01-10 10:34             ` Andrea Arcangeli
2002-01-10 10:29           ` Andrea Arcangeli
2002-01-10 10:18         ` Andrea Arcangeli
2002-01-10 10:22           ` Jens Axboe
2002-01-10 10:47             ` Andrea Arcangeli
2002-01-10 10:51               ` Jens Axboe
2002-01-10 11:09                 ` Andrea Arcangeli
2002-01-10 16:58                   ` Badari Pulavarty
2002-01-11 13:52                     ` Andrea Arcangeli
2002-01-10 19:24                   ` Badari Pulavarty
2002-01-10 20:00                     ` Alan Cox
2002-01-10 21:03                       ` Badari Pulavarty
2002-01-10 21:26                         ` Alan Cox
2002-01-10 21:15                       ` Badari Pulavarty
2002-01-15  3:16                         ` Joel Becker
2002-01-10  7:12       ` Jens Axboe
2002-01-09 18:13   ` Jens Axboe
2002-01-09 22:56 ` Alan Cox
2002-01-09 23:20   ` Badari Pulavarty
2002-01-10  0:23     ` Alan Cox
2002-01-10  4:06       ` GOTO Masanori
     [not found] <200201091741.g09HfAI17240@eng2.beaverton.ibm.com.suse.lists.linux.kernel>
     [not found] ` <20020109125845.B12609@redhat.com.suse.lists.linux.kernel>
2002-01-09 18:15   ` Andi Kleen
     [not found] <200201102153.g0ALrl402482@eng2.beaverton.ibm.com>
2002-01-10 22:11 ` Alan Cox
2002-01-10 22:20   ` Badari Pulavarty

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox