--- linux-2.6.9-22.EL/include/linux/fs.h 2005-12-07 12:43:55.000000000 -0500 +++ linux.truncate/include/linux/fs.h 2005-12-02 00:25:22.000000000 -0500 @@ -1509,7 +1509,8 @@ ssize_t __blockdev_direct_IO(int rw, str int lock_type); enum { - DIO_LOCKING = 1, /* need locking between buffered and direct access */ + DIO_CLUSTER_LOCKING = 0, /* allow (cluster) fs handle its own locking */ + DIO_LOCKING, /* need locking between buffered and direct access */ DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */ DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */ }; @@ -1541,6 +1542,15 @@ static inline ssize_t blockdev_direct_IO nr_segs, get_blocks, end_io, DIO_OWN_LOCKING); } +static inline ssize_t blockdev_direct_IO_cluster_locking(int rw, struct kiocb *iocb, + struct inode *inode, struct block_device *bdev, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, + dio_iodone_t end_io) +{ + return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_blocks, end_io, DIO_CLUSTER_LOCKING); +} + extern struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) --- linux-2.6.9-22.EL/fs/direct-io.c 2005-11-09 17:26:02.000000000 -0500 +++ linux.truncate/fs/direct-io.c 2005-12-07 12:27:17.000000000 -0500 @@ -515,7 +515,7 @@ static int get_more_blocks(struct dio *d fs_count++; create = dio->rw == WRITE; - if (dio->lock_type == DIO_LOCKING) { + if ((dio->lock_type == DIO_LOCKING) || (dio->lock_type == DIO_CLUSTER_LOCKING)) { if (dio->block_in_file < (i_size_read(dio->inode) >> dio->blkbits)) create = 0; @@ -1183,9 +1183,16 @@ __blockdev_direct_IO(int rw, struct kioc * For regular files using DIO_OWN_LOCKING, * neither readers nor writers take any locks here * (i_sem is already held and release for writers here) + * The DIO_CLUSTER_LOCKING allows (cluster) filesystem manages its own + * locking (bypassing i_sem and i_alloc_sem handling within + * __blockdev_direct_IO()). */ + dio->lock_type = dio_lock_type; - if (dio_lock_type != DIO_NO_LOCKING) { + if (dio_lock_type == DIO_CLUSTER_LOCKING) + goto cluster_skip_locking; + + if (dio_lock_type != DIO_NO_LOCKING) { if (rw == READ) { struct address_space *mapping; @@ -1205,6 +1212,9 @@ __blockdev_direct_IO(int rw, struct kioc if (dio_lock_type == DIO_LOCKING) down_read(&inode->i_alloc_sem); } + +cluster_skip_locking: + /* * For file extending writes updating i_size before data * writeouts complete can expose uninitialized blocks. So