diff for duplicates of <4CD001A2.4000408@linux.vnet.ibm.com> diff --git a/a/1.txt b/N1/1.txt index 5333955..9280d18 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,184 +1,116 @@ Hi, -this is about an issue newer kernels show, bysplitting direct I/O reque= -sts -into 4k pieces to directly merge them in the Block Device Layer afterwa= -rds. +this is about an issue newer kernels show, bysplitting direct I/O requests +into 4k pieces to directly merge them in the Block Device Layer afterwards. If anyone is interested in own tests just use a simple command like -dd if=3D/mnt/test/test-dd1 of=3D/dev/null iflag=3Ddirect bs=3D64k count= -=3D1 +dd if=/mnt/test/test-dd1 of=/dev/null iflag=direct bs=64k count=1 in combination with blktrace. -The following patch is more a proposal for discussion than a solution, = -well +The following patch is more a proposal for discussion than a solution, well thats what RFC's are about right. -I'm unsure about names, but also if the approach in general is the righ= -t way. +I'm unsure about names, but also if the approach in general is the right way. It should apply to every 2.6.36 and 2.6.37 kernel. -I put everyone on CC who was involved in the patches leading to the cur= -rent +I put everyone on CC who was involved in the patches leading to the current behavior. -Gr=FCsse / regards, +Grüsse / regards, Christian Ehrhardt -IBM Linux Technology Center, System z Linux Performance=20 +IBM Linux Technology Center, System z Linux Performance --- cut here --- -Subject: [RFC][PATCH] direct-io: btrfs: avoid splitting dio requests fo= -r non-btrfs filesystems +Subject: [RFC][PATCH] direct-io: btrfs: avoid splitting dio requests for non-btrfs filesystems -=46rom: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> +From: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> -Commit c2c6ca41 by Josef Bacik <josef@redhat.com> caused all direct I/O= -'s to +Commit c2c6ca41 by Josef Bacik <josef@redhat.com> caused all direct I/O's to be split into 4k requests before arriving in the block device layer. This was later on partially fixed by Jeff Moyer <jmoyer@redhat.com> in 7a801ac6. -Jeffs fix improved the situation a lot, but eventually it still splits = -I/Os +Jeffs fix improved the situation a lot, but eventually it still splits I/Os for non-btrfs file systems as well were it wouldn't have to. -Eventually in my example on a ext2 filesystem it splits it every 4Mb wh= -ere +Eventually in my example on a ext2 filesystem it splits it every 4Mb where dio->boundary is evaluated to true. In blktrace this looks like: - dd-910 [002] 38.762523: 94,8 A R 131264 + 8 <-= - (94,9) 131072 - dd-910 [002] 38.762531: 94,8 Q R 131264 + 8 [d= -d] - dd-910 [002] 38.762535: 94,8 G R 131264 + 8 [d= -d] + dd-910 [002] 38.762523: 94,8 A R 131264 + 8 <- (94,9) 131072 + dd-910 [002] 38.762531: 94,8 Q R 131264 + 8 [dd] + dd-910 [002] 38.762535: 94,8 G R 131264 + 8 [dd] dd-910 [002] 38.762537: 94,8 P N [dd] - dd-910 [002] 38.762539: 94,8 I R 131264 + 8 [d= -d] - dd-910 [002] 38.762544: 94,8 A R 131272 + 8 <-= - (94,9) 131080 - dd-910 [002] 38.762544: 94,8 Q R 131272 + 8 [d= -d] - dd-910 [002] 38.762546: 94,8 M R 131272 + 8 [d= -d] - dd-910 [002] 38.762550: 94,8 A R 131280 + 8 <-= - (94,9) 131088 - dd-910 [002] 38.762551: 94,8 Q R 131280 + 8 [d= -d] - dd-910 [002] 38.762551: 94,8 M R 131280 + 8 [d= -d] - dd-910 [002] 38.762556: 94,8 A R 131288 + 8 <-= - (94,9) 131096 - dd-910 [002] 38.762557: 94,8 Q R 131288 + 8 [d= -d] - dd-910 [002] 38.762557: 94,8 M R 131288 + 8 [d= -d] - dd-910 [002] 38.762562: 94,8 A R 131296 + 8 <-= - (94,9) 131104 - dd-910 [002] 38.762563: 94,8 Q R 131296 + 8 [d= -d] - dd-910 [002] 38.762564: 94,8 M R 131296 + 8 [d= -d] - dd-910 [002] 38.762568: 94,8 A R 131304 + 8 <-= - (94,9) 131112 - dd-910 [002] 38.762569: 94,8 Q R 131304 + 8 [d= -d] - dd-910 [002] 38.762570: 94,8 M R 131304 + 8 [d= -d] - dd-910 [002] 38.762577: 94,8 A R 131312 + 8 <-= - (94,9) 131120 - dd-910 [002] 38.762578: 94,8 Q R 131312 + 8 [d= -d] - dd-910 [002] 38.762579: 94,8 M R 131312 + 8 [d= -d] - dd-910 [002] 38.762584: 94,8 A R 131320 + 8 <-= - (94,9) 131128 - dd-910 [002] 38.762584: 94,8 Q R 131320 + 8 [d= -d] - dd-910 [002] 38.762585: 94,8 M R 131320 + 8 [d= -d] - dd-910 [002] 38.762590: 94,8 A R 131328 + 8 <-= - (94,9) 131136 - dd-910 [002] 38.762590: 94,8 Q R 131328 + 8 [d= -d] - dd-910 [002] 38.762591: 94,8 M R 131328 + 8 [d= -d] - dd-910 [002] 38.762596: 94,8 A R 131336 + 8 <-= - (94,9) 131144 - dd-910 [002] 38.762597: 94,8 Q R 131336 + 8 [d= -d] - dd-910 [002] 38.762598: 94,8 M R 131336 + 8 [d= -d] - dd-910 [002] 38.762605: 94,8 A R 131344 + 16 <= -- (94,9) 131152 - dd-910 [002] 38.762607: 94,8 Q R 131344 + 16 [= -dd] - dd-910 [002] 38.762608: 94,8 M R 131344 + 16 [= -dd] - dd-910 [002] 38.762611: 94,8 A R 131368 + 32 <= -- (94,9) 131176 - dd-910 [002] 38.762612: 94,8 Q R 131368 + 32 [= -dd] - dd-910 [002] 38.762616: 94,8 G R 131368 + 32 [= -dd] - dd-910 [002] 38.762617: 94,8 I R 131368 + 32 [= -dd] + dd-910 [002] 38.762539: 94,8 I R 131264 + 8 [dd] + dd-910 [002] 38.762544: 94,8 A R 131272 + 8 <- (94,9) 131080 + dd-910 [002] 38.762544: 94,8 Q R 131272 + 8 [dd] + dd-910 [002] 38.762546: 94,8 M R 131272 + 8 [dd] + dd-910 [002] 38.762550: 94,8 A R 131280 + 8 <- (94,9) 131088 + dd-910 [002] 38.762551: 94,8 Q R 131280 + 8 [dd] + dd-910 [002] 38.762551: 94,8 M R 131280 + 8 [dd] + dd-910 [002] 38.762556: 94,8 A R 131288 + 8 <- (94,9) 131096 + dd-910 [002] 38.762557: 94,8 Q R 131288 + 8 [dd] + dd-910 [002] 38.762557: 94,8 M R 131288 + 8 [dd] + dd-910 [002] 38.762562: 94,8 A R 131296 + 8 <- (94,9) 131104 + dd-910 [002] 38.762563: 94,8 Q R 131296 + 8 [dd] + dd-910 [002] 38.762564: 94,8 M R 131296 + 8 [dd] + dd-910 [002] 38.762568: 94,8 A R 131304 + 8 <- (94,9) 131112 + dd-910 [002] 38.762569: 94,8 Q R 131304 + 8 [dd] + dd-910 [002] 38.762570: 94,8 M R 131304 + 8 [dd] + dd-910 [002] 38.762577: 94,8 A R 131312 + 8 <- (94,9) 131120 + dd-910 [002] 38.762578: 94,8 Q R 131312 + 8 [dd] + dd-910 [002] 38.762579: 94,8 M R 131312 + 8 [dd] + dd-910 [002] 38.762584: 94,8 A R 131320 + 8 <- (94,9) 131128 + dd-910 [002] 38.762584: 94,8 Q R 131320 + 8 [dd] + dd-910 [002] 38.762585: 94,8 M R 131320 + 8 [dd] + dd-910 [002] 38.762590: 94,8 A R 131328 + 8 <- (94,9) 131136 + dd-910 [002] 38.762590: 94,8 Q R 131328 + 8 [dd] + dd-910 [002] 38.762591: 94,8 M R 131328 + 8 [dd] + dd-910 [002] 38.762596: 94,8 A R 131336 + 8 <- (94,9) 131144 + dd-910 [002] 38.762597: 94,8 Q R 131336 + 8 [dd] + dd-910 [002] 38.762598: 94,8 M R 131336 + 8 [dd] + dd-910 [002] 38.762605: 94,8 A R 131344 + 16 <- (94,9) 131152 + dd-910 [002] 38.762607: 94,8 Q R 131344 + 16 [dd] + dd-910 [002] 38.762608: 94,8 M R 131344 + 16 [dd] + dd-910 [002] 38.762611: 94,8 A R 131368 + 32 <- (94,9) 131176 + dd-910 [002] 38.762612: 94,8 Q R 131368 + 32 [dd] + dd-910 [002] 38.762616: 94,8 G R 131368 + 32 [dd] + dd-910 [002] 38.762617: 94,8 I R 131368 + 32 [dd] dd-910 [002] 38.762619: 94,8 U N [dd] 2 - dd-910 [002] 38.762621: 94,8 D R 131264 + 96 [= -dd] - dd-910 [002] 38.762625: 94,8 D R 131368 + 32 [= -dd] - <idle>-0 [012] 38.763363: 94,8 C R 131264 + 96 [= -0]=20 - <idle>-0 [015] 38.763797: 94,8 C R 131368 + 32 [= -0] + dd-910 [002] 38.762621: 94,8 D R 131264 + 96 [dd] + dd-910 [002] 38.762625: 94,8 D R 131368 + 32 [dd] + <idle>-0 [012] 38.763363: 94,8 C R 131264 + 96 [0] + <idle>-0 [015] 38.763797: 94,8 C R 131368 + 32 [0] The usual behavior before both commits was: - dd-919 [002] 37.513685: 94,8 A R 7824 + 96 <- = -(94,9) 7632 - dd-919 [002] 37.513693: 94,8 Q R 7824 + 96 [dd= -] - dd-919 [002] 37.513697: 94,8 G R 7824 + 96 [dd= -] + dd-919 [002] 37.513685: 94,8 A R 7824 + 96 <- (94,9) 7632 + dd-919 [002] 37.513693: 94,8 Q R 7824 + 96 [dd] + dd-919 [002] 37.513697: 94,8 G R 7824 + 96 [dd] dd-919 [002] 37.513700: 94,8 P N [dd] - dd-919 [002] 37.513701: 94,8 I R 7824 + 96 [dd= -] - dd-919 [002] 37.513794: 94,8 A R 7928 + 32 <- = -(94,9) 7736 - dd-919 [002] 37.513795: 94,8 Q R 7928 + 32 [dd= -] - dd-919 [002] 37.513800: 94,8 G R 7928 + 32 [dd= -] - dd-919 [002] 37.513802: 94,8 I R 7928 + 32 [dd= -] + dd-919 [002] 37.513701: 94,8 I R 7824 + 96 [dd] + dd-919 [002] 37.513794: 94,8 A R 7928 + 32 <- (94,9) 7736 + dd-919 [002] 37.513795: 94,8 Q R 7928 + 32 [dd] + dd-919 [002] 37.513800: 94,8 G R 7928 + 32 [dd] + dd-919 [002] 37.513802: 94,8 I R 7928 + 32 [dd] dd-919 [002] 37.513804: 94,8 U N [dd] 2 - dd-919 [002] 37.513806: 94,8 D R 7824 + 96 [dd= -] - dd-919 [002] 37.513810: 94,8 D R 7928 + 32 [dd= -] - <idle>-0 [011] 37.514362: 94,8 C R 7824 + 96 [0]= -=20 + dd-919 [002] 37.513806: 94,8 D R 7824 + 96 [dd] + dd-919 [002] 37.513810: 94,8 D R 7928 + 32 [dd] + <idle>-0 [011] 37.514362: 94,8 C R 7824 + 96 [0] <idle>-0 [014] 37.514728: 94,8 C R 7928 + 32 [0] That remaining split is cause by the test for: - "dio->final_block_in_bio !=3D dio->cur_page_block". + "dio->final_block_in_bio != dio->cur_page_block". As this was in the code for a long time I just assume it is right. -So eventually for the 64k request in the example this patch improves th= -e +So eventually for the 64k request in the example this patch improves the effective submissions that get to the block device layer from: 10x4k, 1x8k, 1x16k to 1x48k & 1x16k which is much better. -Througput impact is small, but in terms of cpu consumption this is visi= -ble +Througput impact is small, but in terms of cpu consumption this is visible by a single digit percentage depending on the incoming request size. -The solution looking for comments or alternatives in this RFC patch add= -s a new -kiocb flag that let filesystems specify if they need these workaround t= -o -separate meta data reads - if not, like all pre-btrfs filesystems the d= -io code +The solution looking for comments or alternatives in this RFC patch adds a new +kiocb flag that let filesystems specify if they need these workaround to +separate meta data reads - if not, like all pre-btrfs filesystems the dio code doesn't have to cause this extra work. Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> @@ -203,15 +135,14 @@ index c038644..1126185 100644 #include "compat.h" #include "ctree.h" #include "disk-io.h" -@@ -5822,6 +5823,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kio= -cb *iocb, +@@ -5822,6 +5823,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, free_extent_state(cached_state); - cached_state =3D NULL; -=20 + cached_state = NULL; + + /* btrfs cannot handle logically non-contiguous requests */ + kiocb_set_separate_meta_reads(iocb); + - ret =3D __blockdev_direct_IO(rw, iocb, inode, + ret = __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, diff --git a/fs/direct-io.c b/fs/direct-io.c @@ -224,7 +155,7 @@ index 48d74c7..6d2dcb2 100644 #include <linux/uio.h> +#include <linux/aio.h> #include <asm/atomic.h> -=20 + /* @@ -79,6 +80,7 @@ struct dio { sector_t final_block_in_request;/* doesn't change */ @@ -242,18 +173,17 @@ index 48d74c7..6d2dcb2 100644 + else if (dio->separate_meta_reads && dio->boundary) dio_bio_submit(dio); } -=20 -@@ -1245,6 +1247,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb,= - struct inode *inode, - dio->is_async =3D !is_sync_kiocb(iocb) && !((rw & WRITE) && + +@@ -1245,6 +1247,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && (end > i_size_read(inode))); -=20 + + /* + * some filesystems e.g. btrfs need to separate metadata read + */ -+ dio->separate_meta_reads =3D kiocb_needs_separate_meta_reads(iocb); ++ dio->separate_meta_reads = kiocb_needs_separate_meta_reads(iocb); + - retval =3D direct_io_worker(rw, iocb, inode, iov, offset, + retval = direct_io_worker(rw, iocb, inode, iov, offset, nr_segs, blkbits, get_block, end_io, submit_io, dio); diff --git a/include/linux/aio.h b/include/linux/aio.h @@ -266,26 +196,16 @@ index 7a8db41..9ee9c6e 100644 #define KIF_CANCELLED 2 +/* to separate meta reads */ +#define KIF_SEPARATE_META 3 -=20 - #define kiocbTryLock(iocb) test_and_set_bit(KIF_LOCKED, &(iocb)->ki_fl= -ags) - #define kiocbTryKick(iocb) test_and_set_bit(KIF_KICKED, &(iocb)->ki_fl= -ags) + + #define kiocbTryLock(iocb) test_and_set_bit(KIF_LOCKED, &(iocb)->ki_flags) + #define kiocbTryKick(iocb) test_and_set_bit(KIF_KICKED, &(iocb)->ki_flags) @@ -50,6 +52,9 @@ struct kioctx; #define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags) - #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_fla= -gs) -=20 -+#define kiocb_set_separate_meta_reads(iocb) set_bit(KIF_SEPARATE_META,= - &(iocb)->ki_flags) -+#define kiocb_needs_separate_meta_reads(iocb) (KIF_SEPARATE_META & (io= -cb)->ki_flags) + #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags) + ++#define kiocb_set_separate_meta_reads(iocb) set_bit(KIF_SEPARATE_META, &(iocb)->ki_flags) ++#define kiocb_needs_separate_meta_reads(iocb) (KIF_SEPARATE_META & (iocb)->ki_flags) + /* is there a better place to document function pointer methods? */ /** * ki_retry - iocb forward progress callback --- -To unsubscribe from this list: send the line "unsubscribe linux-btrfs" = -in -the body of a message to majordomo@vger.kernel.org -More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/a/content_digest b/N1/content_digest index 2e45f41..6fbcc86 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -5,190 +5,123 @@ Josef Bacik <jbacik@redhat.com> Chris Mason <chris.mason@oracle.com> linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org> - " linux-btrf\0" + linux-btrfs@vger.kernel.org + " linux-fsdevel@vger.kernel.org <linux-fsdevel@vger.kernel.org>\0" "\00:1\0" "b\0" "Hi,\n" - "this is about an issue newer kernels show, bysplitting direct I/O reque=\n" - "sts\n" - "into 4k pieces to directly merge them in the Block Device Layer afterwa=\n" - "rds.\n" + "this is about an issue newer kernels show, bysplitting direct I/O requests\n" + "into 4k pieces to directly merge them in the Block Device Layer afterwards.\n" "\n" "If anyone is interested in own tests just use a simple command like\n" - "dd if=3D/mnt/test/test-dd1 of=3D/dev/null iflag=3Ddirect bs=3D64k count=\n" - "=3D1\n" + "dd if=/mnt/test/test-dd1 of=/dev/null iflag=direct bs=64k count=1\n" "in combination with blktrace.\n" "\n" - "The following patch is more a proposal for discussion than a solution, =\n" - "well\n" + "The following patch is more a proposal for discussion than a solution, well\n" "thats what RFC's are about right.\n" - "I'm unsure about names, but also if the approach in general is the righ=\n" - "t way.\n" + "I'm unsure about names, but also if the approach in general is the right way.\n" "It should apply to every 2.6.36 and 2.6.37 kernel.\n" "\n" - "I put everyone on CC who was involved in the patches leading to the cur=\n" - "rent\n" + "I put everyone on CC who was involved in the patches leading to the current\n" "behavior.\n" "\n" - "Gr=FCsse / regards,\n" + "Gr\303\274sse / regards,\n" "Christian Ehrhardt\n" - "IBM Linux Technology Center, System z Linux Performance=20\n" + "IBM Linux Technology Center, System z Linux Performance \n" "\n" "--- cut here ---\n" "\n" - "Subject: [RFC][PATCH] direct-io: btrfs: avoid splitting dio requests fo=\n" - "r non-btrfs filesystems\n" + "Subject: [RFC][PATCH] direct-io: btrfs: avoid splitting dio requests for non-btrfs filesystems\n" "\n" - "=46rom: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>\n" + "From: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>\n" "\n" - "Commit c2c6ca41 by Josef Bacik <josef@redhat.com> caused all direct I/O=\n" - "'s to\n" + "Commit c2c6ca41 by Josef Bacik <josef@redhat.com> caused all direct I/O's to\n" "be split into 4k requests before arriving in the block device layer.\n" "This was later on partially fixed by Jeff Moyer <jmoyer@redhat.com> in\n" "7a801ac6.\n" "\n" - "Jeffs fix improved the situation a lot, but eventually it still splits =\n" - "I/Os\n" + "Jeffs fix improved the situation a lot, but eventually it still splits I/Os\n" "for non-btrfs file systems as well were it wouldn't have to.\n" "\n" - "Eventually in my example on a ext2 filesystem it splits it every 4Mb wh=\n" - "ere\n" + "Eventually in my example on a ext2 filesystem it splits it every 4Mb where\n" "dio->boundary is evaluated to true.\n" "\n" "In blktrace this looks like:\n" - " dd-910 [002] 38.762523: 94,8 A R 131264 + 8 <-=\n" - " (94,9) 131072\n" - " dd-910 [002] 38.762531: 94,8 Q R 131264 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762535: 94,8 G R 131264 + 8 [d=\n" - "d]\n" + " dd-910 [002] 38.762523: 94,8 A R 131264 + 8 <- (94,9) 131072\n" + " dd-910 [002] 38.762531: 94,8 Q R 131264 + 8 [dd]\n" + " dd-910 [002] 38.762535: 94,8 G R 131264 + 8 [dd]\n" " dd-910 [002] 38.762537: 94,8 P N [dd]\n" - " dd-910 [002] 38.762539: 94,8 I R 131264 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762544: 94,8 A R 131272 + 8 <-=\n" - " (94,9) 131080\n" - " dd-910 [002] 38.762544: 94,8 Q R 131272 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762546: 94,8 M R 131272 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762550: 94,8 A R 131280 + 8 <-=\n" - " (94,9) 131088\n" - " dd-910 [002] 38.762551: 94,8 Q R 131280 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762551: 94,8 M R 131280 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762556: 94,8 A R 131288 + 8 <-=\n" - " (94,9) 131096\n" - " dd-910 [002] 38.762557: 94,8 Q R 131288 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762557: 94,8 M R 131288 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762562: 94,8 A R 131296 + 8 <-=\n" - " (94,9) 131104\n" - " dd-910 [002] 38.762563: 94,8 Q R 131296 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762564: 94,8 M R 131296 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762568: 94,8 A R 131304 + 8 <-=\n" - " (94,9) 131112\n" - " dd-910 [002] 38.762569: 94,8 Q R 131304 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762570: 94,8 M R 131304 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762577: 94,8 A R 131312 + 8 <-=\n" - " (94,9) 131120\n" - " dd-910 [002] 38.762578: 94,8 Q R 131312 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762579: 94,8 M R 131312 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762584: 94,8 A R 131320 + 8 <-=\n" - " (94,9) 131128\n" - " dd-910 [002] 38.762584: 94,8 Q R 131320 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762585: 94,8 M R 131320 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762590: 94,8 A R 131328 + 8 <-=\n" - " (94,9) 131136\n" - " dd-910 [002] 38.762590: 94,8 Q R 131328 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762591: 94,8 M R 131328 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762596: 94,8 A R 131336 + 8 <-=\n" - " (94,9) 131144\n" - " dd-910 [002] 38.762597: 94,8 Q R 131336 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762598: 94,8 M R 131336 + 8 [d=\n" - "d]\n" - " dd-910 [002] 38.762605: 94,8 A R 131344 + 16 <=\n" - "- (94,9) 131152\n" - " dd-910 [002] 38.762607: 94,8 Q R 131344 + 16 [=\n" - "dd]\n" - " dd-910 [002] 38.762608: 94,8 M R 131344 + 16 [=\n" - "dd]\n" - " dd-910 [002] 38.762611: 94,8 A R 131368 + 32 <=\n" - "- (94,9) 131176\n" - " dd-910 [002] 38.762612: 94,8 Q R 131368 + 32 [=\n" - "dd]\n" - " dd-910 [002] 38.762616: 94,8 G R 131368 + 32 [=\n" - "dd]\n" - " dd-910 [002] 38.762617: 94,8 I R 131368 + 32 [=\n" - "dd]\n" + " dd-910 [002] 38.762539: 94,8 I R 131264 + 8 [dd]\n" + " dd-910 [002] 38.762544: 94,8 A R 131272 + 8 <- (94,9) 131080\n" + " dd-910 [002] 38.762544: 94,8 Q R 131272 + 8 [dd]\n" + " dd-910 [002] 38.762546: 94,8 M R 131272 + 8 [dd]\n" + " dd-910 [002] 38.762550: 94,8 A R 131280 + 8 <- (94,9) 131088\n" + " dd-910 [002] 38.762551: 94,8 Q R 131280 + 8 [dd]\n" + " dd-910 [002] 38.762551: 94,8 M R 131280 + 8 [dd]\n" + " dd-910 [002] 38.762556: 94,8 A R 131288 + 8 <- (94,9) 131096\n" + " dd-910 [002] 38.762557: 94,8 Q R 131288 + 8 [dd]\n" + " dd-910 [002] 38.762557: 94,8 M R 131288 + 8 [dd]\n" + " dd-910 [002] 38.762562: 94,8 A R 131296 + 8 <- (94,9) 131104\n" + " dd-910 [002] 38.762563: 94,8 Q R 131296 + 8 [dd]\n" + " dd-910 [002] 38.762564: 94,8 M R 131296 + 8 [dd]\n" + " dd-910 [002] 38.762568: 94,8 A R 131304 + 8 <- (94,9) 131112\n" + " dd-910 [002] 38.762569: 94,8 Q R 131304 + 8 [dd]\n" + " dd-910 [002] 38.762570: 94,8 M R 131304 + 8 [dd]\n" + " dd-910 [002] 38.762577: 94,8 A R 131312 + 8 <- (94,9) 131120\n" + " dd-910 [002] 38.762578: 94,8 Q R 131312 + 8 [dd]\n" + " dd-910 [002] 38.762579: 94,8 M R 131312 + 8 [dd]\n" + " dd-910 [002] 38.762584: 94,8 A R 131320 + 8 <- (94,9) 131128\n" + " dd-910 [002] 38.762584: 94,8 Q R 131320 + 8 [dd]\n" + " dd-910 [002] 38.762585: 94,8 M R 131320 + 8 [dd]\n" + " dd-910 [002] 38.762590: 94,8 A R 131328 + 8 <- (94,9) 131136\n" + " dd-910 [002] 38.762590: 94,8 Q R 131328 + 8 [dd]\n" + " dd-910 [002] 38.762591: 94,8 M R 131328 + 8 [dd]\n" + " dd-910 [002] 38.762596: 94,8 A R 131336 + 8 <- (94,9) 131144\n" + " dd-910 [002] 38.762597: 94,8 Q R 131336 + 8 [dd]\n" + " dd-910 [002] 38.762598: 94,8 M R 131336 + 8 [dd]\n" + " dd-910 [002] 38.762605: 94,8 A R 131344 + 16 <- (94,9) 131152\n" + " dd-910 [002] 38.762607: 94,8 Q R 131344 + 16 [dd]\n" + " dd-910 [002] 38.762608: 94,8 M R 131344 + 16 [dd]\n" + " dd-910 [002] 38.762611: 94,8 A R 131368 + 32 <- (94,9) 131176\n" + " dd-910 [002] 38.762612: 94,8 Q R 131368 + 32 [dd]\n" + " dd-910 [002] 38.762616: 94,8 G R 131368 + 32 [dd]\n" + " dd-910 [002] 38.762617: 94,8 I R 131368 + 32 [dd]\n" " dd-910 [002] 38.762619: 94,8 U N [dd] 2\n" - " dd-910 [002] 38.762621: 94,8 D R 131264 + 96 [=\n" - "dd]\n" - " dd-910 [002] 38.762625: 94,8 D R 131368 + 32 [=\n" - "dd]\n" - " <idle>-0 [012] 38.763363: 94,8 C R 131264 + 96 [=\n" - "0]=20\n" - " <idle>-0 [015] 38.763797: 94,8 C R 131368 + 32 [=\n" - "0]\n" + " dd-910 [002] 38.762621: 94,8 D R 131264 + 96 [dd]\n" + " dd-910 [002] 38.762625: 94,8 D R 131368 + 32 [dd]\n" + " <idle>-0 [012] 38.763363: 94,8 C R 131264 + 96 [0] \n" + " <idle>-0 [015] 38.763797: 94,8 C R 131368 + 32 [0]\n" "\n" "The usual behavior before both commits was:\n" - " dd-919 [002] 37.513685: 94,8 A R 7824 + 96 <- =\n" - "(94,9) 7632\n" - " dd-919 [002] 37.513693: 94,8 Q R 7824 + 96 [dd=\n" - "]\n" - " dd-919 [002] 37.513697: 94,8 G R 7824 + 96 [dd=\n" - "]\n" + " dd-919 [002] 37.513685: 94,8 A R 7824 + 96 <- (94,9) 7632\n" + " dd-919 [002] 37.513693: 94,8 Q R 7824 + 96 [dd]\n" + " dd-919 [002] 37.513697: 94,8 G R 7824 + 96 [dd]\n" " dd-919 [002] 37.513700: 94,8 P N [dd]\n" - " dd-919 [002] 37.513701: 94,8 I R 7824 + 96 [dd=\n" - "]\n" - " dd-919 [002] 37.513794: 94,8 A R 7928 + 32 <- =\n" - "(94,9) 7736\n" - " dd-919 [002] 37.513795: 94,8 Q R 7928 + 32 [dd=\n" - "]\n" - " dd-919 [002] 37.513800: 94,8 G R 7928 + 32 [dd=\n" - "]\n" - " dd-919 [002] 37.513802: 94,8 I R 7928 + 32 [dd=\n" - "]\n" + " dd-919 [002] 37.513701: 94,8 I R 7824 + 96 [dd]\n" + " dd-919 [002] 37.513794: 94,8 A R 7928 + 32 <- (94,9) 7736\n" + " dd-919 [002] 37.513795: 94,8 Q R 7928 + 32 [dd]\n" + " dd-919 [002] 37.513800: 94,8 G R 7928 + 32 [dd]\n" + " dd-919 [002] 37.513802: 94,8 I R 7928 + 32 [dd]\n" " dd-919 [002] 37.513804: 94,8 U N [dd] 2\n" - " dd-919 [002] 37.513806: 94,8 D R 7824 + 96 [dd=\n" - "]\n" - " dd-919 [002] 37.513810: 94,8 D R 7928 + 32 [dd=\n" - "]\n" - " <idle>-0 [011] 37.514362: 94,8 C R 7824 + 96 [0]=\n" - "=20\n" + " dd-919 [002] 37.513806: 94,8 D R 7824 + 96 [dd]\n" + " dd-919 [002] 37.513810: 94,8 D R 7928 + 32 [dd]\n" + " <idle>-0 [011] 37.514362: 94,8 C R 7824 + 96 [0] \n" " <idle>-0 [014] 37.514728: 94,8 C R 7928 + 32 [0]\n" "\n" "That remaining split is cause by the test for:\n" - " \"dio->final_block_in_bio !=3D dio->cur_page_block\".\n" + " \"dio->final_block_in_bio != dio->cur_page_block\".\n" "As this was in the code for a long time I just assume it is right.\n" "\n" - "So eventually for the 64k request in the example this patch improves th=\n" - "e\n" + "So eventually for the 64k request in the example this patch improves the\n" "effective submissions that get to the block device layer from:\n" "10x4k, 1x8k, 1x16k to 1x48k & 1x16k which is much better.\n" "\n" - "Througput impact is small, but in terms of cpu consumption this is visi=\n" - "ble\n" + "Througput impact is small, but in terms of cpu consumption this is visible\n" "by a single digit percentage depending on the incoming request size.\n" "\n" - "The solution looking for comments or alternatives in this RFC patch add=\n" - "s a new\n" - "kiocb flag that let filesystems specify if they need these workaround t=\n" - "o\n" - "separate meta data reads - if not, like all pre-btrfs filesystems the d=\n" - "io code\n" + "The solution looking for comments or alternatives in this RFC patch adds a new\n" + "kiocb flag that let filesystems specify if they need these workaround to\n" + "separate meta data reads - if not, like all pre-btrfs filesystems the dio code\n" "doesn't have to cause this extra work.\n" "\n" "Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>\n" @@ -213,15 +146,14 @@ " #include \"compat.h\"\n" " #include \"ctree.h\"\n" " #include \"disk-io.h\"\n" - "@@ -5822,6 +5823,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kio=\n" - "cb *iocb,\n" + "@@ -5822,6 +5823,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,\n" " \tfree_extent_state(cached_state);\n" - " \tcached_state =3D NULL;\n" - "=20\n" + " \tcached_state = NULL;\n" + " \n" "+\t/* btrfs cannot handle logically non-contiguous requests */\n" "+\tkiocb_set_separate_meta_reads(iocb);\n" "+\n" - " \tret =3D __blockdev_direct_IO(rw, iocb, inode,\n" + " \tret = __blockdev_direct_IO(rw, iocb, inode,\n" " \t\t BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,\n" " \t\t iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,\n" "diff --git a/fs/direct-io.c b/fs/direct-io.c\n" @@ -234,7 +166,7 @@ " #include <linux/uio.h>\n" "+#include <linux/aio.h>\n" " #include <asm/atomic.h>\n" - "=20\n" + " \n" " /*\n" "@@ -79,6 +80,7 @@ struct dio {\n" " \tsector_t final_block_in_request;/* doesn't change */\n" @@ -252,18 +184,17 @@ "+\t\telse if (dio->separate_meta_reads && dio->boundary)\n" " \t\t\tdio_bio_submit(dio);\n" " \t}\n" - "=20\n" - "@@ -1245,6 +1247,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb,=\n" - " struct inode *inode,\n" - " \tdio->is_async =3D !is_sync_kiocb(iocb) && !((rw & WRITE) &&\n" + " \n" + "@@ -1245,6 +1247,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,\n" + " \tdio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&\n" " \t\t(end > i_size_read(inode)));\n" - "=20\n" + " \n" "+\t/*\n" "+\t * some filesystems e.g. btrfs need to separate metadata read\n" "+\t */\n" - "+\tdio->separate_meta_reads =3D kiocb_needs_separate_meta_reads(iocb);\n" + "+\tdio->separate_meta_reads = kiocb_needs_separate_meta_reads(iocb);\n" "+\n" - " \tretval =3D direct_io_worker(rw, iocb, inode, iov, offset,\n" + " \tretval = direct_io_worker(rw, iocb, inode, iov, offset,\n" " \t\t\t\tnr_segs, blkbits, get_block, end_io,\n" " \t\t\t\tsubmit_io, dio);\n" "diff --git a/include/linux/aio.h b/include/linux/aio.h\n" @@ -276,28 +207,18 @@ " #define KIF_CANCELLED\t\t2\n" "+/* to separate meta reads */\n" "+#define KIF_SEPARATE_META\t3\n" - "=20\n" - " #define kiocbTryLock(iocb)\ttest_and_set_bit(KIF_LOCKED, &(iocb)->ki_fl=\n" - "ags)\n" - " #define kiocbTryKick(iocb)\ttest_and_set_bit(KIF_KICKED, &(iocb)->ki_fl=\n" - "ags)\n" + " \n" + " #define kiocbTryLock(iocb)\ttest_and_set_bit(KIF_LOCKED, &(iocb)->ki_flags)\n" + " #define kiocbTryKick(iocb)\ttest_and_set_bit(KIF_KICKED, &(iocb)->ki_flags)\n" "@@ -50,6 +52,9 @@ struct kioctx;\n" " #define kiocbIsKicked(iocb)\ttest_bit(KIF_KICKED, &(iocb)->ki_flags)\n" - " #define kiocbIsCancelled(iocb)\ttest_bit(KIF_CANCELLED, &(iocb)->ki_fla=\n" - "gs)\n" - "=20\n" - "+#define kiocb_set_separate_meta_reads(iocb)\tset_bit(KIF_SEPARATE_META,=\n" - " &(iocb)->ki_flags)\n" - "+#define kiocb_needs_separate_meta_reads(iocb)\t(KIF_SEPARATE_META & (io=\n" - "cb)->ki_flags)\n" + " #define kiocbIsCancelled(iocb)\ttest_bit(KIF_CANCELLED, &(iocb)->ki_flags)\n" + " \n" + "+#define kiocb_set_separate_meta_reads(iocb)\tset_bit(KIF_SEPARATE_META, &(iocb)->ki_flags)\n" + "+#define kiocb_needs_separate_meta_reads(iocb)\t(KIF_SEPARATE_META & (iocb)->ki_flags)\n" "+\n" " /* is there a better place to document function pointer methods? */\n" " /**\n" - " * ki_retry\t-\tiocb forward progress callback\n" - "--\n" - "To unsubscribe from this list: send the line \"unsubscribe linux-btrfs\" =\n" - "in\n" - "the body of a message to majordomo@vger.kernel.org\n" - More majordomo info at http://vger.kernel.org/majordomo-info.html + " * ki_retry\t-\tiocb forward progress callback" -01083753601ed26b7b68b375393e417011fcfe8c17758fa800fe5f7f2c1bec6d +6f062b963b3f8735afa5235c7afebc3f0d004c968a9339b9b3d43b685e202104
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.