All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] O_DIRECT with hardware blocksize alignment
@ 2002-01-09 19:56 Joel Becker
  2002-01-12 12:31 ` Andrea Arcangeli
  0 siblings, 1 reply; 12+ messages in thread
From: Joel Becker @ 2002-01-09 19:56 UTC (permalink / raw)
  To: Marcelo Tosatti, Andrea Arcangeli, lkml

Folks,
	Some major users of O_DIRECT (Oracle, for instance) align and
size I/O based on the 512byte hardware blocksize common to most hard
disk drives.  The current O_DIRECT code enforces that the alignment and
size of the I/O match the software blocksize (inot->i_sb->s_blocksize).
This patch relaxes that restriction to a minimum of the hardware
blocksize.  In the interest of efficiency,
min(I/O alignment, s_blocksize) is used as the effective
blocksize.  eg:

I/O alignment	s_blocksize	final blocksize
8192		4096		4096
4096		4096		4096
512		4096		512

Joel


diff -uNr linux-2.4.17/fs/buffer.c linux-2.4.17-od/fs/buffer.c
--- linux-2.4.17/fs/buffer.c	Fri Dec 21 09:41:55 2001
+++ linux-2.4.17-od/fs/buffer.c	Wed Jan  9 10:55:52 2002
@@ -2003,6 +2003,17 @@
 {
 	int i, nr_blocks, retval;
 	unsigned long * blocks = iobuf->blocks;
+	int i_bscale, i_blocknr, i_blockoff;
+
+	/* Calculate I/O blocksize to sw blocksize scaling factor */
+	i_bscale = 1;
+	if (blocksize != inode->i_sb->s_blocksize)
+	{
+		if ((inode->i_sb->s_blocksize < blocksize) ||
+		    ((inode->i_sb->s_blocksize % blocksize) != 0))
+			BUG();
+		i_bscale = inode->i_sb->s_blocksize / blocksize;
+	}
 
 	nr_blocks = iobuf->length / blocksize;
 	/* build the blocklist */
@@ -2013,7 +2024,13 @@
 		bh.b_dev = inode->i_dev;
 		bh.b_size = blocksize;
 
-		retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1);
+		/* Convert blocknr to the software blocksize */
+		if (!i_bscale)
+			BUG();
+		i_blocknr = blocknr / i_bscale;
+		i_blockoff = blocknr % i_bscale;
+
+		retval = get_block(inode, i_blocknr, &bh, rw == READ ? 0 : 1);
 		if (retval)
 			goto out;
 
@@ -2031,7 +2048,12 @@
 			if (!buffer_mapped(&bh))
 				BUG();
 		}
-		blocks[i] = bh.b_blocknr;
+
+		/*
+		 * Convert the returned blocknr back to the
+		 * I/O blocksize.
+		 */
+		blocks[i] = (bh.b_blocknr * i_bscale) + i_blockoff;
 	}
 
 	retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize);
diff -uNr linux-2.4.17/mm/filemap.c linux-2.4.17-od/mm/filemap.c
--- linux-2.4.17/mm/filemap.c	Fri Dec 21 09:42:04 2001
+++ linux-2.4.17-od/mm/filemap.c	Wed Jan  9 10:58:13 2002
@@ -1491,6 +1491,7 @@
 {
 	ssize_t retval;
 	int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
+	int b_bsize, b_bmask;
 	struct kiobuf * iobuf;
 	struct address_space * mapping = filp->f_dentry->d_inode->i_mapping;
 	struct inode * inode = mapping->host;
@@ -1508,14 +1509,36 @@
 		new_iobuf = 1;
 	}
 
+	chunk_size = KIO_MAX_ATOMIC_IO << 10;
+
+	retval = -EINVAL;
+
+	/*
+	 * Starting at the software blocksize, check size
+	 * and alignment of the I/O.  Shift the blocksize
+	 * down until we get an alignment that works, or we hit
+	 * the hardware blocksize and fail.
+	 */
+	b_bsize = get_hardsect_size(inode->i_dev);
+	b_bmask = (b_bsize - 1);
+
 	blocksize = 1 << inode->i_blkbits;
 	blocksize_bits = inode->i_blkbits;
 	blocksize_mask = blocksize - 1;
-	chunk_size = KIO_MAX_ATOMIC_IO << 10;
+	while (blocksize >= b_bsize)
+	{
+		if (! ((offset & blocksize_mask) ||
+		       (count & blocksize_mask) ||
+		       ((unsigned long)buf & blocksize_mask)))
+			break;
 
-	retval = -EINVAL;
-	if ((offset & blocksize_mask) || (count & blocksize_mask))
+		blocksize >>= 1;
+		blocksize_mask = (blocksize - 1);
+		blocksize_bits--;
+	}
+	if (blocksize < b_bsize)
 		goto out_free;
+
 	if (!mapping->a_ops->direct_IO)
 		goto out_free;
 
-- 

"Every new beginning comes from some other beginning's end."

			http://www.jlbec.org/
			jlbec@evilplan.org

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2002-01-28  2:06 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-01-09 19:56 [PATCH] O_DIRECT with hardware blocksize alignment Joel Becker
2002-01-12 12:31 ` Andrea Arcangeli
2002-01-15  3:21   ` Joel Becker
2002-01-15 12:20     ` Andrea Arcangeli
2002-01-15 13:08       ` Jens Axboe
2002-01-15 13:55         ` Jens Axboe
2002-01-15 21:23           ` Badari Pulavarty
2002-01-15 21:44             ` Alan Cox
2002-01-24  0:44           ` [PATCH] small bugfix for ll_rw_bio() for 2.5.3-pre3 Badari Pulavarty
2002-01-24 21:52           ` O_DIRECT broken in 2.5.3-preX ? Badari Pulavarty
2002-01-28  2:06             ` Andrea Arcangeli
2002-01-16  0:08       ` [PATCH] O_DIRECT with hardware blocksize alignment Joel Becker

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.