* [PATCH v7 4/6] block: loop: prepare for supporing direct IO
[not found] <1437061068-26118-1-git-send-email-ming.lei@canonical.com>
@ 2015-07-16 15:37 ` Ming Lei
[not found] ` <1437061068-26118-5-git-send-email-ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-07-16 15:37 ` [PATCH v7 5/6] block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO Ming Lei
1 sibling, 1 reply; 13+ messages in thread
From: Ming Lei @ 2015-07-16 15:37 UTC (permalink / raw)
To: Jens Axboe, linux-kernel
Cc: Justin M. Forbes, Jeff Moyer, Tejun Heo, Christoph Hellwig,
Ming Lei, linux-api
This patches provides one interface for enabling direct IO
from user space:
- userspace(such as losetup) can pass 'file' which is
opened/fcntl as O_DIRECT
Also __loop_update_dio() is introduced to check if direct I/O
can be used on current loop setting.
The last big change is to introduce LO_FLAGS_DIRECT_IO flag
for userspace to know if direct IO is used to access backing
file.
Cc: linux-api@vger.kernel.org
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
drivers/block/loop.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++-
drivers/block/loop.h | 1 +
include/uapi/linux/loop.h | 1 +
3 files changed, 66 insertions(+), 1 deletion(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ce94b92..35aa3dd 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -164,6 +164,49 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
return get_size(lo->lo_offset, lo->lo_sizelimit, file);
}
+static void __loop_update_dio(struct loop_device *lo, bool dio)
+{
+ struct file *file = lo->lo_backing_file;
+ struct inode *inode = file->f_mapping->host;
+ bool use_dio;
+
+ /*
+ * loop block's logical block size is 512, now
+ * we support direct I/O only if the backing
+ * block devices' minimize I/O size is 512 and
+ * the offset is aligned with 512.
+ */
+ if (dio) {
+ if (inode->i_sb->s_bdev &&
+ bdev_io_min(inode->i_sb->s_bdev) == 512 &&
+ !(lo->lo_offset & 511))
+ use_dio = true;
+ else
+ use_dio = false;
+ } else {
+ use_dio = false;
+ }
+
+ if (lo->use_dio == use_dio)
+ return;
+
+ /* flush dirty pages before changing direct IO */
+ vfs_fsync(file, 0);
+
+ /*
+ * The flag of LO_FLAGS_DIRECT_IO is handled similarly with
+ * LO_FLAGS_READ_ONLY, both are set from kernel, and losetup
+ * will get updated by ioctl(LOOP_GET_STATUS)
+ */
+ blk_mq_freeze_queue(lo->lo_queue);
+ lo->use_dio = use_dio;
+ if (use_dio)
+ lo->lo_flags |= LO_FLAGS_DIRECT_IO;
+ else
+ lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
+ blk_mq_unfreeze_queue(lo->lo_queue);
+}
+
static int
figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
@@ -173,8 +216,12 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
if (unlikely((loff_t)x != size))
return -EFBIG;
- if (lo->lo_offset != offset)
+ if (lo->lo_offset != offset) {
lo->lo_offset = offset;
+
+ /* update dio if lo_offset is changed*/
+ __loop_update_dio(lo, lo->use_dio);
+ }
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
set_capacity(lo->lo_disk, x);
@@ -421,6 +468,11 @@ struct switch_request {
struct completion wait;
};
+static inline void loop_update_dio(struct loop_device *lo)
+{
+ __loop_update_dio(lo, io_is_direct(lo->lo_backing_file));
+}
+
/*
* Do the actual switch; called from the BIO completion routine
*/
@@ -441,6 +493,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+ loop_update_dio(lo);
}
/*
@@ -627,11 +680,19 @@ static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
return sprintf(buf, "%s\n", partscan ? "1" : "0");
}
+static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
+{
+ int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
+
+ return sprintf(buf, "%s\n", dio ? "1" : "0");
+}
+
LOOP_ATTR_RO(backing_file);
LOOP_ATTR_RO(offset);
LOOP_ATTR_RO(sizelimit);
LOOP_ATTR_RO(autoclear);
LOOP_ATTR_RO(partscan);
+LOOP_ATTR_RO(dio);
static struct attribute *loop_attrs[] = {
&loop_attr_backing_file.attr,
@@ -639,6 +700,7 @@ static struct attribute *loop_attrs[] = {
&loop_attr_sizelimit.attr,
&loop_attr_autoclear.attr,
&loop_attr_partscan.attr,
+ &loop_attr_dio.attr,
NULL,
};
@@ -783,6 +845,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_flush(lo->lo_queue, REQ_FLUSH);
+ loop_update_dio(lo);
set_capacity(lo->lo_disk, size);
bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index b6c7d21..d1de221 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -58,6 +58,7 @@ struct loop_device {
struct mutex lo_ctl_mutex;
struct kthread_worker worker;
struct task_struct *worker_task;
+ bool use_dio;
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index e0cecd2..949851c 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -21,6 +21,7 @@ enum {
LO_FLAGS_READ_ONLY = 1,
LO_FLAGS_AUTOCLEAR = 4,
LO_FLAGS_PARTSCAN = 8,
+ LO_FLAGS_DIRECT_IO = 16,
};
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v7 5/6] block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO
[not found] <1437061068-26118-1-git-send-email-ming.lei@canonical.com>
2015-07-16 15:37 ` [PATCH v7 4/6] block: loop: prepare for supporing direct IO Ming Lei
@ 2015-07-16 15:37 ` Ming Lei
1 sibling, 0 replies; 13+ messages in thread
From: Ming Lei @ 2015-07-16 15:37 UTC (permalink / raw)
To: Jens Axboe, linux-kernel
Cc: Justin M. Forbes, Jeff Moyer, Tejun Heo, Christoph Hellwig,
Ming Lei, linux-api
If loop block is mounted via 'mount -o loop', it isn't easy
to pass file descriptor opened as O_DIRECT, so this patch
introduces a new command to support direct IO for this case.
Cc: linux-api@vger.kernel.org
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
drivers/block/loop.c | 19 +++++++++++++++++++
include/uapi/linux/loop.h | 1 +
2 files changed, 20 insertions(+)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 35aa3dd..3f26a0a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1214,6 +1214,20 @@ static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
}
+static int loop_set_dio(struct loop_device *lo, unsigned long arg)
+{
+ int error = -ENXIO;
+ if (lo->lo_state != Lo_bound)
+ goto out;
+
+ __loop_update_dio(lo, !!arg);
+ if (lo->use_dio == !!arg)
+ return 0;
+ error = -EINVAL;
+ out:
+ return error;
+}
+
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -1257,6 +1271,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_capacity(lo, bdev);
break;
+ case LOOP_SET_DIRECT_IO:
+ err = -EPERM;
+ if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+ err = loop_set_dio(lo, arg);
+ break;
default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
}
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index 949851c..c8125ec 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -87,6 +87,7 @@ struct loop_info64 {
#define LOOP_GET_STATUS64 0x4C05
#define LOOP_CHANGE_FD 0x4C06
#define LOOP_SET_CAPACITY 0x4C07
+#define LOOP_SET_DIRECT_IO 0x4C08
/* /dev/loop-control interface */
#define LOOP_CTL_ADD 0x4C80
--
1.9.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
[not found] ` <1437061068-26118-5-git-send-email-ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
@ 2015-07-27 8:40 ` Christoph Hellwig
[not found] ` <20150727084020.GA28336-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2015-07-27 8:40 UTC (permalink / raw)
To: Ming Lei
Cc: Jens Axboe, linux-kernel-u79uwXL29TY76Z2rM5mHXA, Justin M. Forbes,
Jeff Moyer, Tejun Heo, Christoph Hellwig,
linux-api-u79uwXL29TY76Z2rM5mHXA
> + /*
> + * loop block's logical block size is 512, now
> + * we support direct I/O only if the backing
> + * block devices' minimize I/O size is 512 and
> + * the offset is aligned with 512.
> + */
> + if (dio) {
> + if (inode->i_sb->s_bdev &&
> + bdev_io_min(inode->i_sb->s_bdev) == 512 &&
> + !(lo->lo_offset & 511))
Why the hardcoded value? I suspect this should be more like:
if (dio && inode->i_sb->s_bdev &&
(lo->lo_offset & (bdev_io_min(inode->i_sb->s_bdev) - 1)) != 0)
dio = false;
> + blk_mq_freeze_queue(lo->lo_queue);
> + lo->use_dio = use_dio;
> + if (use_dio)
> + lo->lo_flags |= LO_FLAGS_DIRECT_IO;
> + else
> + lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
> + blk_mq_unfreeze_queue(lo->lo_queue);
Locking?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
[not found] ` <20150727084020.GA28336-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
@ 2015-07-27 9:41 ` Ming Lei
2015-07-27 9:45 ` Christoph Hellwig
0 siblings, 1 reply; 13+ messages in thread
From: Ming Lei @ 2015-07-27 9:41 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jens Axboe, Linux Kernel Mailing List, Justin M. Forbes,
Jeff Moyer, Tejun Heo, linux-api-u79uwXL29TY76Z2rM5mHXA
On Mon, Jul 27, 2015 at 4:40 AM, Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org> wrote:
>> + /*
>> + * loop block's logical block size is 512, now
>> + * we support direct I/O only if the backing
>> + * block devices' minimize I/O size is 512 and
>> + * the offset is aligned with 512.
>> + */
>> + if (dio) {
>> + if (inode->i_sb->s_bdev &&
>> + bdev_io_min(inode->i_sb->s_bdev) == 512 &&
>> + !(lo->lo_offset & 511))
>
> Why the hardcoded value? I suspect this should be more like:
>
> if (dio && inode->i_sb->s_bdev &&
> (lo->lo_offset & (bdev_io_min(inode->i_sb->s_bdev) - 1)) != 0)
> dio = false;
The above can't work if the backing device has a bigger sector size
(such as 4K), that is why loop's direct-io requires 512 min_io_size of
backing device.
>
>> + blk_mq_freeze_queue(lo->lo_queue);
>> + lo->use_dio = use_dio;
>> + if (use_dio)
>> + lo->lo_flags |= LO_FLAGS_DIRECT_IO;
>> + else
>> + lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
>> + blk_mq_unfreeze_queue(lo->lo_queue);
>
> Locking?
__loop_update_dio() is only called inside ioctl path, so
mutex of lo->lo_ctl_mutex has been held already.
Thanks,
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
2015-07-27 9:41 ` Ming Lei
@ 2015-07-27 9:45 ` Christoph Hellwig
[not found] ` <20150727094530.GA15507-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2015-07-27 9:45 UTC (permalink / raw)
To: Ming Lei
Cc: Christoph Hellwig, Jens Axboe, Linux Kernel Mailing List,
Justin M. Forbes, Jeff Moyer, Tejun Heo, linux-api
On Mon, Jul 27, 2015 at 05:41:57AM -0400, Ming Lei wrote:
> > Why the hardcoded value? I suspect this should be more like:
> >
> > if (dio && inode->i_sb->s_bdev &&
> > (lo->lo_offset & (bdev_io_min(inode->i_sb->s_bdev) - 1)) != 0)
> > dio = false;
>
> The above can't work if the backing device has a bigger sector size
> (such as 4K), that is why loop's direct-io requires 512 min_io_size of
> backing device.
Why doesn't it work? If the backing device sector size is 4k
and lo_offset is 0 or a multiple of 4k it should allow direct I/O,
and my code sniplet will allow that.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
[not found] ` <20150727094530.GA15507-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
@ 2015-07-27 9:53 ` Ming Lei
2015-07-27 17:33 ` Christoph Hellwig
[not found] ` <CACVXFVMKycx768HtAJXMgEZNjsQrNm_f3UzW9kUysSHAMM5FPQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 2 replies; 13+ messages in thread
From: Ming Lei @ 2015-07-27 9:53 UTC (permalink / raw)
To: Christoph Hellwig, Dave Chinner
Cc: Jens Axboe, Linux Kernel Mailing List, Justin M. Forbes,
Jeff Moyer, Tejun Heo, linux-api-u79uwXL29TY76Z2rM5mHXA
On Mon, Jul 27, 2015 at 5:45 AM, Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org> wrote:
> On Mon, Jul 27, 2015 at 05:41:57AM -0400, Ming Lei wrote:
>> > Why the hardcoded value? I suspect this should be more like:
>> >
>> > if (dio && inode->i_sb->s_bdev &&
>> > (lo->lo_offset & (bdev_io_min(inode->i_sb->s_bdev) - 1)) != 0)
>> > dio = false;
>>
>> The above can't work if the backing device has a bigger sector size
>> (such as 4K), that is why loop's direct-io requires 512 min_io_size of
>> backing device.
>
> Why doesn't it work? If the backing device sector size is 4k
> and lo_offset is 0 or a multiple of 4k it should allow direct I/O,
> and my code sniplet will allow that.
Because size has to be 4k aligned too.
And it can't work in the example posted by Dave Chinner:
> I have a 4k sector backing device and a 512 byte sector filesystem
> image. I can't do 512 byte direct IO to the filesystem image, so I
> can't run tools that handle fs images in files using direct Io on
> that file. Create a loop device with the filesystem image, and now I
> can do 512 byte direct IO to the filesystem image, because all that
> direct IO to the filesystem image is now buffered by the loop
> device.
>
> If the loop device does direct io in this situation, the backing
> filesystem rejects direct IO from the loop device because it is not
> sector (4k) sized/aligned. User now swears, shouts and curses you
> from afar.
Thanks,
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
2015-07-27 9:53 ` Ming Lei
@ 2015-07-27 17:33 ` Christoph Hellwig
2015-07-29 7:33 ` Ming Lei
[not found] ` <CACVXFVMKycx768HtAJXMgEZNjsQrNm_f3UzW9kUysSHAMM5FPQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
1 sibling, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2015-07-27 17:33 UTC (permalink / raw)
To: Ming Lei
Cc: Christoph Hellwig, Dave Chinner, Jens Axboe,
Linux Kernel Mailing List, Justin M. Forbes, Jeff Moyer,
Tejun Heo, linux-api
On Mon, Jul 27, 2015 at 05:53:33AM -0400, Ming Lei wrote:
> Because size has to be 4k aligned too.
Yes. But again I don't see any reason to limit us to a hardcoded 512
byte block size here, especially considering the patches to finally
allow enabling other block sizes from userspace.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
[not found] ` <CACVXFVMKycx768HtAJXMgEZNjsQrNm_f3UzW9kUysSHAMM5FPQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-07-27 22:06 ` Dave Chinner
0 siblings, 0 replies; 13+ messages in thread
From: Dave Chinner @ 2015-07-27 22:06 UTC (permalink / raw)
To: Ming Lei
Cc: Christoph Hellwig, Jens Axboe, Linux Kernel Mailing List,
Justin M. Forbes, Jeff Moyer, Tejun Heo,
linux-api-u79uwXL29TY76Z2rM5mHXA
On Mon, Jul 27, 2015 at 05:53:33AM -0400, Ming Lei wrote:
> On Mon, Jul 27, 2015 at 5:45 AM, Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org> wrote:
> > On Mon, Jul 27, 2015 at 05:41:57AM -0400, Ming Lei wrote:
> >> > Why the hardcoded value? I suspect this should be more like:
> >> >
> >> > if (dio && inode->i_sb->s_bdev &&
> >> > (lo->lo_offset & (bdev_io_min(inode->i_sb->s_bdev) - 1)) != 0)
> >> > dio = false;
> >>
> >> The above can't work if the backing device has a bigger sector size
> >> (such as 4K), that is why loop's direct-io requires 512 min_io_size of
> >> backing device.
> >
> > Why doesn't it work? If the backing device sector size is 4k
> > and lo_offset is 0 or a multiple of 4k it should allow direct I/O,
> > and my code sniplet will allow that.
>
> Because size has to be 4k aligned too.
So check that, too. Any >= 4k block size filesystem should be doing
mostly 4k aligned and sized IO...
Cheers,
Dave.
--
Dave Chinner
david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
2015-07-27 17:33 ` Christoph Hellwig
@ 2015-07-29 7:33 ` Ming Lei
2015-07-29 8:41 ` Dave Chinner
0 siblings, 1 reply; 13+ messages in thread
From: Ming Lei @ 2015-07-29 7:33 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Dave Chinner, Jens Axboe, Linux Kernel Mailing List,
Justin M. Forbes, Jeff Moyer, Tejun Heo, linux-api
On Mon, Jul 27, 2015 at 1:33 PM, Christoph Hellwig <hch@infradead.org> wrote:
> On Mon, Jul 27, 2015 at 05:53:33AM -0400, Ming Lei wrote:
>> Because size has to be 4k aligned too.
>
> Yes. But again I don't see any reason to limit us to a hardcoded 512
> byte block size here, especially considering the patches to finally
>From loop block's view, the request size can be any count of 512-byte
sectors, then the transfer size to backing device can't guarantee to be
4k aligned always.
> allow enabling other block sizes from userspace.
I have some questions about the patchset, and looks the author doesn't
reply it yet.
On Mon, Jul 27, 2015 at 6:06 PM, Dave Chinner <david@fromorbit.com> wrote:
>> Because size has to be 4k aligned too.
>
> So check that, too. Any >= 4k block size filesystem should be doing
> mostly 4k aligned and sized IO...
I guess you mean we only use direct IO for the 4k aligned and sized IO?
If so, that won't be efficient because the page cache has to be flushed
during the switch.
Thanks,
Ming
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
2015-07-29 7:33 ` Ming Lei
@ 2015-07-29 8:41 ` Dave Chinner
2015-07-29 11:21 ` Ming Lei
0 siblings, 1 reply; 13+ messages in thread
From: Dave Chinner @ 2015-07-29 8:41 UTC (permalink / raw)
To: Ming Lei
Cc: Christoph Hellwig, Jens Axboe, Linux Kernel Mailing List,
Justin M. Forbes, Jeff Moyer, Tejun Heo, linux-api
On Wed, Jul 29, 2015 at 03:33:52AM -0400, Ming Lei wrote:
> On Mon, Jul 27, 2015 at 1:33 PM, Christoph Hellwig <hch@infradead.org> wrote:
> > On Mon, Jul 27, 2015 at 05:53:33AM -0400, Ming Lei wrote:
> >> Because size has to be 4k aligned too.
> >
> > Yes. But again I don't see any reason to limit us to a hardcoded 512
> > byte block size here, especially considering the patches to finally
>
> From loop block's view, the request size can be any count of 512-byte
> sectors, then the transfer size to backing device can't guarantee to be
> 4k aligned always.
In theory, yes. In practise, doesn't happen very often.
> > allow enabling other block sizes from userspace.
>
> I have some questions about the patchset, and looks the author doesn't
> reply it yet.
>
> On Mon, Jul 27, 2015 at 6:06 PM, Dave Chinner <david@fromorbit.com> wrote:
> >> Because size has to be 4k aligned too.
> >
> > So check that, too. Any >= 4k block size filesystem should be doing
> > mostly 4k aligned and sized IO...
>
> I guess you mean we only use direct IO for the 4k aligned and sized IO?
> If so, that won't be efficient because the page cache has to be flushed
> during the switch.
It will be extremely rare for a 4k block size filesystem to do
anything other than 4k aligned and sized IO. Think about it for a
minute: what does the page cache do to unaligned IO patterns (i.e.
buffered IO)? It does IO in page sizes, and so if the application
if doing badly aligned or sized IO with buffered IO, then the
underlying device will only ever size page sized and aligned IO.
Hence sector aligned IO will only come from applications doing
direct IO. If the application is doing direct IO and it's not
properly aligned, then it already is going to get sucky performance
because most filesystem serialise sub-block size direct IO because
concurrent sub-block IOs to the same block usually leads to data
corruption.
So, really, sector aligned/sized direct IO is a sucky performance
path before we even get to the loop device, so we don't really need
to care how fast the loop device handles this case. The loop device
just needs to ensure that it doesn't corrupt data when badly aligned
IOs come in... ;)
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
2015-07-29 8:41 ` Dave Chinner
@ 2015-07-29 11:21 ` Ming Lei
[not found] ` <CACVXFVMOuCk0bHZfrV=VZWLtgsa4oWxrpnu6aoB1LKZ50UMhZA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 13+ messages in thread
From: Ming Lei @ 2015-07-29 11:21 UTC (permalink / raw)
To: Dave Chinner
Cc: Christoph Hellwig, Jens Axboe, Linux Kernel Mailing List,
Justin M. Forbes, Jeff Moyer, Tejun Heo,
linux-api-u79uwXL29TY76Z2rM5mHXA
On Wed, Jul 29, 2015 at 4:41 AM, Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org> wrote:
> On Wed, Jul 29, 2015 at 03:33:52AM -0400, Ming Lei wrote:
>> On Mon, Jul 27, 2015 at 1:33 PM, Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org> wrote:
>> > On Mon, Jul 27, 2015 at 05:53:33AM -0400, Ming Lei wrote:
>> >> Because size has to be 4k aligned too.
>> >
>> > Yes. But again I don't see any reason to limit us to a hardcoded 512
>> > byte block size here, especially considering the patches to finally
>>
>> From loop block's view, the request size can be any count of 512-byte
>> sectors, then the transfer size to backing device can't guarantee to be
>> 4k aligned always.
>
> In theory, yes. In practise, doesn't happen very often.
>
>> > allow enabling other block sizes from userspace.
>>
>> I have some questions about the patchset, and looks the author doesn't
>> reply it yet.
>>
>> On Mon, Jul 27, 2015 at 6:06 PM, Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org> wrote:
>> >> Because size has to be 4k aligned too.
>> >
>> > So check that, too. Any >= 4k block size filesystem should be doing
>> > mostly 4k aligned and sized IO...
>>
>> I guess you mean we only use direct IO for the 4k aligned and sized IO?
>> If so, that won't be efficient because the page cache has to be flushed
>> during the switch.
>
> It will be extremely rare for a 4k block size filesystem to do
> anything other than 4k aligned and sized IO. Think about it for a
> minute: what does the page cache do to unaligned IO patterns (i.e.
> buffered IO)? It does IO in page sizes, and so if the application
> if doing badly aligned or sized IO with buffered IO, then the
> underlying device will only ever size page sized and aligned IO.
>
> Hence sector aligned IO will only come from applications doing
> direct IO. If the application is doing direct IO and it's not
> properly aligned, then it already is going to get sucky performance
> because most filesystem serialise sub-block size direct IO because
> concurrent sub-block IOs to the same block usually leads to data
> corruption.
The blocksize of filesysten over loop can be 512, 1024, 2048, and
suppose sector size of backing device is 4096, then filesystem
can see aligned direct IO when IO size/offset from application is aligned
with fs block size, but loop still can't do direct IO for all this
kind of requests
against backing file.
Another case is that application may access loop block directly, such
as 'dd if=/dev/loopN', but it may not be common, and maybe it needn't
to consider.
Thanks,
>
> So, really, sector aligned/sized direct IO is a sucky performance
> path before we even get to the loop device, so we don't really need
> to care how fast the loop device handles this case. The loop device
> just needs to ensure that it doesn't corrupt data when badly aligned
> IOs come in... ;)
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
[not found] ` <CACVXFVMOuCk0bHZfrV=VZWLtgsa4oWxrpnu6aoB1LKZ50UMhZA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-07-29 22:08 ` Dave Chinner
2015-07-30 8:01 ` Ming Lei
0 siblings, 1 reply; 13+ messages in thread
From: Dave Chinner @ 2015-07-29 22:08 UTC (permalink / raw)
To: Ming Lei
Cc: Christoph Hellwig, Jens Axboe, Linux Kernel Mailing List,
Justin M. Forbes, Jeff Moyer, Tejun Heo,
linux-api-u79uwXL29TY76Z2rM5mHXA
On Wed, Jul 29, 2015 at 07:21:47AM -0400, Ming Lei wrote:
> On Wed, Jul 29, 2015 at 4:41 AM, Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org> wrote:
> > On Wed, Jul 29, 2015 at 03:33:52AM -0400, Ming Lei wrote:
> >> On Mon, Jul 27, 2015 at 1:33 PM, Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org> wrote:
> >> > On Mon, Jul 27, 2015 at 05:53:33AM -0400, Ming Lei wrote:
> >> >> Because size has to be 4k aligned too.
> >> >
> >> > Yes. But again I don't see any reason to limit us to a hardcoded 512
> >> > byte block size here, especially considering the patches to finally
> >>
> >> From loop block's view, the request size can be any count of 512-byte
> >> sectors, then the transfer size to backing device can't guarantee to be
> >> 4k aligned always.
> >
> > In theory, yes. In practise, doesn't happen very often.
> >
> >> > allow enabling other block sizes from userspace.
> >>
> >> I have some questions about the patchset, and looks the author doesn't
> >> reply it yet.
> >>
> >> On Mon, Jul 27, 2015 at 6:06 PM, Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org> wrote:
> >> >> Because size has to be 4k aligned too.
> >> >
> >> > So check that, too. Any >= 4k block size filesystem should be doing
> >> > mostly 4k aligned and sized IO...
> >>
> >> I guess you mean we only use direct IO for the 4k aligned and sized IO?
> >> If so, that won't be efficient because the page cache has to be flushed
> >> during the switch.
> >
> > It will be extremely rare for a 4k block size filesystem to do
> > anything other than 4k aligned and sized IO. Think about it for a
> > minute: what does the page cache do to unaligned IO patterns (i.e.
> > buffered IO)? It does IO in page sizes, and so if the application
> > if doing badly aligned or sized IO with buffered IO, then the
> > underlying device will only ever size page sized and aligned IO.
> >
> > Hence sector aligned IO will only come from applications doing
> > direct IO. If the application is doing direct IO and it's not
> > properly aligned, then it already is going to get sucky performance
> > because most filesystem serialise sub-block size direct IO because
> > concurrent sub-block IOs to the same block usually leads to data
> > corruption.
>
> The blocksize of filesysten over loop can be 512, 1024, 2048, and
> suppose sector size of backing device is 4096, then filesystem
> can see aligned direct IO when IO size/offset from application is aligned
> with fs block size, but loop still can't do direct IO for all this
> kind of requests
> against backing file.
Sure, but again you're talking about a fairly rare configuration.
The vast majority of filesystems use 4k block sizes, just like the
vast majority of applications use buffered IO. Don't jump through
hoops to optimise a case that probably doesn't need optimising. Make
it work correctly first, then optimise performance later when
someone has a need for it to be really fast.
> Another case is that application may access loop block directly, such
> as 'dd if=/dev/loopN', but it may not be common, and maybe it needn't
> to consider.
'dd if=/dev/loopN bs=4k....'
Cheers,
Dave.
--
Dave Chinner
david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v7 4/6] block: loop: prepare for supporing direct IO
2015-07-29 22:08 ` Dave Chinner
@ 2015-07-30 8:01 ` Ming Lei
0 siblings, 0 replies; 13+ messages in thread
From: Ming Lei @ 2015-07-30 8:01 UTC (permalink / raw)
To: Dave Chinner
Cc: Christoph Hellwig, Jens Axboe, Linux Kernel Mailing List,
Justin M. Forbes, Jeff Moyer, Tejun Heo,
linux-api-u79uwXL29TY76Z2rM5mHXA
On Wed, Jul 29, 2015 at 6:08 PM, Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org> wrote:
> On Wed, Jul 29, 2015 at 07:21:47AM -0400, Ming Lei wrote:
>> On Wed, Jul 29, 2015 at 4:41 AM, Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org> wrote:
>> > On Wed, Jul 29, 2015 at 03:33:52AM -0400, Ming Lei wrote:
>> >> On Mon, Jul 27, 2015 at 1:33 PM, Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org> wrote:
>> >> > On Mon, Jul 27, 2015 at 05:53:33AM -0400, Ming Lei wrote:
>> >> >> Because size has to be 4k aligned too.
>> >> >
>> >> > Yes. But again I don't see any reason to limit us to a hardcoded 512
>> >> > byte block size here, especially considering the patches to finally
>> >>
>> >> From loop block's view, the request size can be any count of 512-byte
>> >> sectors, then the transfer size to backing device can't guarantee to be
>> >> 4k aligned always.
>> >
>> > In theory, yes. In practise, doesn't happen very often.
>> >
>> >> > allow enabling other block sizes from userspace.
>> >>
>> >> I have some questions about the patchset, and looks the author doesn't
>> >> reply it yet.
>> >>
>> >> On Mon, Jul 27, 2015 at 6:06 PM, Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org> wrote:
>> >> >> Because size has to be 4k aligned too.
>> >> >
>> >> > So check that, too. Any >= 4k block size filesystem should be doing
>> >> > mostly 4k aligned and sized IO...
>> >>
>> >> I guess you mean we only use direct IO for the 4k aligned and sized IO?
>> >> If so, that won't be efficient because the page cache has to be flushed
>> >> during the switch.
>> >
>> > It will be extremely rare for a 4k block size filesystem to do
>> > anything other than 4k aligned and sized IO. Think about it for a
>> > minute: what does the page cache do to unaligned IO patterns (i.e.
>> > buffered IO)? It does IO in page sizes, and so if the application
>> > if doing badly aligned or sized IO with buffered IO, then the
>> > underlying device will only ever size page sized and aligned IO.
>> >
>> > Hence sector aligned IO will only come from applications doing
>> > direct IO. If the application is doing direct IO and it's not
>> > properly aligned, then it already is going to get sucky performance
>> > because most filesystem serialise sub-block size direct IO because
>> > concurrent sub-block IOs to the same block usually leads to data
>> > corruption.
>>
>> The blocksize of filesysten over loop can be 512, 1024, 2048, and
>> suppose sector size of backing device is 4096, then filesystem
>> can see aligned direct IO when IO size/offset from application is aligned
>> with fs block size, but loop still can't do direct IO for all this
>> kind of requests
>> against backing file.
>
> Sure, but again you're talking about a fairly rare configuration.
> The vast majority of filesystems use 4k block sizes, just like the
> vast majority of applications use buffered IO. Don't jump through
> hoops to optimise a case that probably doesn't need optimising. Make
> it work correctly first, then optimise performance later when
> someone has a need for it to be really fast.
OK, I will support 1024, 2048 and 4096 sector size in v8.
>
>> Another case is that application may access loop block directly, such
>> as 'dd if=/dev/loopN', but it may not be common, and maybe it needn't
>> to consider.
>
> 'dd if=/dev/loopN bs=4k....'
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2015-07-30 8:01 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1437061068-26118-1-git-send-email-ming.lei@canonical.com>
2015-07-16 15:37 ` [PATCH v7 4/6] block: loop: prepare for supporing direct IO Ming Lei
[not found] ` <1437061068-26118-5-git-send-email-ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-07-27 8:40 ` Christoph Hellwig
[not found] ` <20150727084020.GA28336-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
2015-07-27 9:41 ` Ming Lei
2015-07-27 9:45 ` Christoph Hellwig
[not found] ` <20150727094530.GA15507-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
2015-07-27 9:53 ` Ming Lei
2015-07-27 17:33 ` Christoph Hellwig
2015-07-29 7:33 ` Ming Lei
2015-07-29 8:41 ` Dave Chinner
2015-07-29 11:21 ` Ming Lei
[not found] ` <CACVXFVMOuCk0bHZfrV=VZWLtgsa4oWxrpnu6aoB1LKZ50UMhZA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-29 22:08 ` Dave Chinner
2015-07-30 8:01 ` Ming Lei
[not found] ` <CACVXFVMKycx768HtAJXMgEZNjsQrNm_f3UzW9kUysSHAMM5FPQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-07-27 22:06 ` Dave Chinner
2015-07-16 15:37 ` [PATCH v7 5/6] block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO Ming Lei
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).