diff for duplicates of <1484701124.2029.9.camel@hpe.com> diff --git a/a/1.txt b/N1/1.txt index d0379bb..1be4c2b 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,39 +1,67 @@ -T24gVHVlLCAyMDE3LTAxLTE3IGF0IDE2OjU5ICswMTAwLCBKYW4gS2FyYSB3cm90ZToNCj4gT24g -RnJpIDEzLTAxLTE3IDE3OjIwOjA4LCBSb3NzIFp3aXNsZXIgd3JvdGU6DQogOg0KPiA+IC0gSWYg -SSByZWNhbGwgY29ycmVjdGx5LCBhdCBvbmUgcG9pbnQgRGF2ZSBDaGlubmVyIHN1Z2dlc3RlZCB0 -aGF0DQo+ID4gd2UgY2hhbmdlIC0gSWYgSSByZWNhbGwgY29ycmVjdGx5LCBhdCBvbmUgcG9pbnQg -RGF2ZSBDaGlubmVyDQo+ID4gc3VnZ2VzdGVkIHRoYXQgd2UgY2hhbmdlIMKgIERBWCBzbyB0aGF0 -IEkvTyB3b3VsZCB1c2UgY2FjaGVkIHN0b3Jlcw0KPiA+IGluc3RlYWQgb2YgdGhlIG5vbi10ZW1w -b3JhbCBzdG9yZXMgwqAgdGhhdCBpdCBjdXJyZW50bHkgdXNlcy7CoMKgV2UNCj4gPiB3b3VsZCB0 -aGVuIHRyYWNrIHBhZ2VzIHRoYXQgd2VyZSB3cml0dGVuIHRvIGJ5IERBWCBpbiB0aGUgcmFkaXgN -Cj4gPiB0cmVlIHNvIHRoYXQgdGhleSB3b3VsZCBiZSBmbHVzaGVkIGxhdGVyIGR1cmluZyDCoA0K -PiA+IGZzeW5jL21zeW5jLsKgwqBEb2VzIHRoaXMgc291bmQgbGlrZSBhIHdpbj/CoMKgQWxzbywg -YXNzdW1pbmcgdGhhdCB3ZQ0KPiA+IGNhbiBmaW5kIGEgc29sdXRpb24gZm9yIHBsYXRmb3JtcyB3 -aGVyZSB0aGUgcHJvY2Vzc29yIGNhY2hlIGlzIHBhcnQNCj4gPiBvZiB0aGUgQURSIHNhZmUgem9u -ZSAoYWJvdmUgdG9waWMpIHRoaXMgd291bGQgYmUgYSBjbGVhcg0KPiA+IGltcHJvdmVtZW50LCBt -b3ZpbmcgdXMgZnJvbSB1c2luZyBub24tdGVtcG9yYWwgc3RvcmVzIHRvIGZhc3Rlcg0KPiA+IGNh -Y2hlZCBzdG9yZXMgd2l0aCBubyBkb3duc2lkZS4NCj4gDQo+IEkgZ3Vlc3MgdGhpcyBuZWVkcyBt -ZWFzdXJlbWVudHMuIEJ1dCBpdCBpcyB3b3J0aCBhIHRyeS4NCg0KQnJhaW4gQm95bHN0b24gZGlk -IHNvbWUgbWVhc3VyZW1lbnQgYmVmb3JlLg0KaHR0cDovL29zcy5zZ2kuY29tL2FyY2hpdmVzL3hm -cy8yMDE2LTA4L21zZzAwMjM5Lmh0bWwNCg0KSSB1cGRhdGVkIGhpcyB0ZXN0IHByb2dyYW0gdG8g -c2tpcCBwbWVtX3BlcnNpc3QoKSBmb3IgdGhlIGNhY2hlZCBjb3B5DQpjYXNlLg0KDQrCoMKgwqDC -oMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqBkc3QgPSBkc3RiYXNlOw0K -KyAjaWYgMA0KwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg -Lyogc2VlIG5vdGUgYWJvdmUgKi8NCsKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDC -oMKgwqDCoMKgwqDCoGlmIChtb2RlID09ICdjJykNCsKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg -wqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqBwbWVtX3BlcnNpc3QoZHN0LCBk -c3Rzeik7DQorICNlbmRpZg0KwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqB9DQoNCkhl -cmUgYXJlIHNhbXBsZSBydW5zOg0KDQokIG51bWFjdGwgLU4wIHRpbWUgLXAgLi9tZW1jcHlwZXJm -IGMgL21udC9wbWVtMC9maWxlIDEwMDAwMDANCklORk86IGRzdCAweDdmMWQwMDAwMDAwMCBzcmMg -MHg2MDEyMDAgZHN0c3ogMjc1NjUwOTY5NiBjcHlzeiAxNjM4NA0KcmVhbCAzLjI4DQp1c2VyIDMu -MjcNCnN5cyAwLjAwDQoNCiQgbnVtYWN0bCAtTjAgdGltZSAtcCAuL21lbWNweXBlcmYgbiAvbW50 -L3BtZW0wL2ZpbGUgMTAwMDAwMA0KSU5GTzogZHN0IDB4N2Y2MDgwMDAwMDAwIHNyYyAweDYwMTIw -MCBkc3RzeiAyNzU2NTA5Njk2IGNweXN6IDE2Mzg0DQpyZWFsIDEuMDENCnVzZXIgMS4wMQ0Kc3lz -IDAuMDANCg0KJCBudW1hY3RsIC1OMSB0aW1lIC1wIC4vbWVtY3B5cGVyZiBjIC9tbnQvcG1lbTAv -ZmlsZSAxMDAwMDAwDQpJTkZPOiBkc3QgMHg3ZmU5MDAwMDAwMDAgc3JjIDB4NjAxMjAwIGRzdHN6 -IDI3NTY1MDk2OTYgY3B5c3ogMTYzODQNCnJlYWwgNC4wNg0KdXNlciA0LjA2DQpzeXMgMC4wMA0K -DQokIG51bWFjdGwgLU4xIHRpbWUgLXAgLi9tZW1jcHlwZXJmIG4gL21udC9wbWVtMC9maWxlIDEw -MDAwMDANCklORk86IGRzdCAweDdmNzY0MDAwMDAwMCBzcmMgMHg2MDEyMDAgZHN0c3ogMjc1NjUw -OTY5NiBjcHlzeiAxNjM4NA0KcmVhbCAxLjI3DQp1c2VyIDEuMjcNCnN5cyAwLjAwDQoNCkluIHRo -aXMgc2ltcGxlIHRlc3QsIHVzaW5nIG5vbi10ZW1wb3JhbCBjb3B5IGlzIHN0aWxsIGZhc3RlciB0 -aGFuIHVzaW5nDQpjYWNoZWQgY29weS4NCg0KVGhhbmtzLA0KLVRvc2hpDQoNCg== +On Tue, 2017-01-17 at 16:59 +0100, Jan Kara wrote: +> On Fri 13-01-17 17:20:08, Ross Zwisler wrote: + : +> > - If I recall correctly, at one point Dave Chinner suggested that +> > we change - If I recall correctly, at one point Dave Chinner +> > suggested that we change DAX so that I/O would use cached stores +> > instead of the non-temporal stores that it currently uses. We +> > would then track pages that were written to by DAX in the radix +> > tree so that they would be flushed later during +> > fsync/msync. Does this sound like a win? Also, assuming that we +> > can find a solution for platforms where the processor cache is part +> > of the ADR safe zone (above topic) this would be a clear +> > improvement, moving us from using non-temporal stores to faster +> > cached stores with no downside. +> +> I guess this needs measurements. But it is worth a try. + +Brain Boylston did some measurement before. +http://oss.sgi.com/archives/xfs/2016-08/msg00239.html + +I updated his test program to skip pmem_persist() for the cached copy +case. + + dst = dstbase; ++ #if 0 + /* see note above */ + if (mode == 'c') + pmem_persist(dst, dstsz); ++ #endif + } + +Here are sample runs: + +$ numactl -N0 time -p ./memcpyperf c /mnt/pmem0/file 1000000 +INFO: dst 0x7f1d00000000 src 0x601200 dstsz 2756509696 cpysz 16384 +real 3.28 +user 3.27 +sys 0.00 + +$ numactl -N0 time -p ./memcpyperf n /mnt/pmem0/file 1000000 +INFO: dst 0x7f6080000000 src 0x601200 dstsz 2756509696 cpysz 16384 +real 1.01 +user 1.01 +sys 0.00 + +$ numactl -N1 time -p ./memcpyperf c /mnt/pmem0/file 1000000 +INFO: dst 0x7fe900000000 src 0x601200 dstsz 2756509696 cpysz 16384 +real 4.06 +user 4.06 +sys 0.00 + +$ numactl -N1 time -p ./memcpyperf n /mnt/pmem0/file 1000000 +INFO: dst 0x7f7640000000 src 0x601200 dstsz 2756509696 cpysz 16384 +real 1.27 +user 1.27 +sys 0.00 + +In this simple test, using non-temporal copy is still faster than using +cached copy. + +Thanks, +-Toshi + +_______________________________________________ +Linux-nvdimm mailing list +Linux-nvdimm@lists.01.org +https://lists.01.org/mailman/listinfo/linux-nvdimm diff --git a/a/content_digest b/N1/content_digest index 0450e64..dcfff6c 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -5,51 +5,79 @@ "Date\0Wed, 18 Jan 2017 00:03:08 +0000\0" "To\0ross.zwisler@linux.intel.com <ross.zwisler@linux.intel.com>" " jack@suse.cz <jack@suse.cz>\0" - "Cc\0linux-mm@kvack.org <linux-mm@kvack.org>" - linux-nvdimm@lists.01.org <linux-nvdimm@lists.01.org> - linux-block@vger.kernel.org <linux-block@vger.kernel.org> + "Cc\0linux-block@vger.kernel.org <linux-block@vger.kernel.org>" + linux-mm@kvack.org <linux-mm@kvack.org> lsf-pc@lists.linux-foundation.org <lsf-pc@lists.linux-foundation.org> - " linux-fsdevel@vger.kernel.org <linux-fsdevel@vger.kernel.org>\0" + linux-fsdevel@vger.kernel.org <linux-fsdevel@vger.kernel.org> + " linux-nvdimm@lists.01.org <linux-nvdimm@lists.01.org>\0" "\00:1\0" "b\0" - "T24gVHVlLCAyMDE3LTAxLTE3IGF0IDE2OjU5ICswMTAwLCBKYW4gS2FyYSB3cm90ZToNCj4gT24g\n" - "RnJpIDEzLTAxLTE3IDE3OjIwOjA4LCBSb3NzIFp3aXNsZXIgd3JvdGU6DQogOg0KPiA+IC0gSWYg\n" - "SSByZWNhbGwgY29ycmVjdGx5LCBhdCBvbmUgcG9pbnQgRGF2ZSBDaGlubmVyIHN1Z2dlc3RlZCB0\n" - "aGF0DQo+ID4gd2UgY2hhbmdlIC0gSWYgSSByZWNhbGwgY29ycmVjdGx5LCBhdCBvbmUgcG9pbnQg\n" - "RGF2ZSBDaGlubmVyDQo+ID4gc3VnZ2VzdGVkIHRoYXQgd2UgY2hhbmdlIMKgIERBWCBzbyB0aGF0\n" - "IEkvTyB3b3VsZCB1c2UgY2FjaGVkIHN0b3Jlcw0KPiA+IGluc3RlYWQgb2YgdGhlIG5vbi10ZW1w\n" - "b3JhbCBzdG9yZXMgwqAgdGhhdCBpdCBjdXJyZW50bHkgdXNlcy7CoMKgV2UNCj4gPiB3b3VsZCB0\n" - "aGVuIHRyYWNrIHBhZ2VzIHRoYXQgd2VyZSB3cml0dGVuIHRvIGJ5IERBWCBpbiB0aGUgcmFkaXgN\n" - "Cj4gPiB0cmVlIHNvIHRoYXQgdGhleSB3b3VsZCBiZSBmbHVzaGVkIGxhdGVyIGR1cmluZyDCoA0K\n" - "PiA+IGZzeW5jL21zeW5jLsKgwqBEb2VzIHRoaXMgc291bmQgbGlrZSBhIHdpbj/CoMKgQWxzbywg\n" - "YXNzdW1pbmcgdGhhdCB3ZQ0KPiA+IGNhbiBmaW5kIGEgc29sdXRpb24gZm9yIHBsYXRmb3JtcyB3\n" - "aGVyZSB0aGUgcHJvY2Vzc29yIGNhY2hlIGlzIHBhcnQNCj4gPiBvZiB0aGUgQURSIHNhZmUgem9u\n" - "ZSAoYWJvdmUgdG9waWMpIHRoaXMgd291bGQgYmUgYSBjbGVhcg0KPiA+IGltcHJvdmVtZW50LCBt\n" - "b3ZpbmcgdXMgZnJvbSB1c2luZyBub24tdGVtcG9yYWwgc3RvcmVzIHRvIGZhc3Rlcg0KPiA+IGNh\n" - "Y2hlZCBzdG9yZXMgd2l0aCBubyBkb3duc2lkZS4NCj4gDQo+IEkgZ3Vlc3MgdGhpcyBuZWVkcyBt\n" - "ZWFzdXJlbWVudHMuIEJ1dCBpdCBpcyB3b3J0aCBhIHRyeS4NCg0KQnJhaW4gQm95bHN0b24gZGlk\n" - "IHNvbWUgbWVhc3VyZW1lbnQgYmVmb3JlLg0KaHR0cDovL29zcy5zZ2kuY29tL2FyY2hpdmVzL3hm\n" - "cy8yMDE2LTA4L21zZzAwMjM5Lmh0bWwNCg0KSSB1cGRhdGVkIGhpcyB0ZXN0IHByb2dyYW0gdG8g\n" - "c2tpcCBwbWVtX3BlcnNpc3QoKSBmb3IgdGhlIGNhY2hlZCBjb3B5DQpjYXNlLg0KDQrCoMKgwqDC\n" - "oMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqBkc3QgPSBkc3RiYXNlOw0K\n" - "KyAjaWYgMA0KwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg\n" - "Lyogc2VlIG5vdGUgYWJvdmUgKi8NCsKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDC\n" - "oMKgwqDCoMKgwqDCoGlmIChtb2RlID09ICdjJykNCsKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg\n" - "wqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqBwbWVtX3BlcnNpc3QoZHN0LCBk\n" - "c3Rzeik7DQorICNlbmRpZg0KwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqB9DQoNCkhl\n" - "cmUgYXJlIHNhbXBsZSBydW5zOg0KDQokIG51bWFjdGwgLU4wIHRpbWUgLXAgLi9tZW1jcHlwZXJm\n" - "IGMgL21udC9wbWVtMC9maWxlIDEwMDAwMDANCklORk86IGRzdCAweDdmMWQwMDAwMDAwMCBzcmMg\n" - "MHg2MDEyMDAgZHN0c3ogMjc1NjUwOTY5NiBjcHlzeiAxNjM4NA0KcmVhbCAzLjI4DQp1c2VyIDMu\n" - "MjcNCnN5cyAwLjAwDQoNCiQgbnVtYWN0bCAtTjAgdGltZSAtcCAuL21lbWNweXBlcmYgbiAvbW50\n" - "L3BtZW0wL2ZpbGUgMTAwMDAwMA0KSU5GTzogZHN0IDB4N2Y2MDgwMDAwMDAwIHNyYyAweDYwMTIw\n" - "MCBkc3RzeiAyNzU2NTA5Njk2IGNweXN6IDE2Mzg0DQpyZWFsIDEuMDENCnVzZXIgMS4wMQ0Kc3lz\n" - "IDAuMDANCg0KJCBudW1hY3RsIC1OMSB0aW1lIC1wIC4vbWVtY3B5cGVyZiBjIC9tbnQvcG1lbTAv\n" - "ZmlsZSAxMDAwMDAwDQpJTkZPOiBkc3QgMHg3ZmU5MDAwMDAwMDAgc3JjIDB4NjAxMjAwIGRzdHN6\n" - "IDI3NTY1MDk2OTYgY3B5c3ogMTYzODQNCnJlYWwgNC4wNg0KdXNlciA0LjA2DQpzeXMgMC4wMA0K\n" - "DQokIG51bWFjdGwgLU4xIHRpbWUgLXAgLi9tZW1jcHlwZXJmIG4gL21udC9wbWVtMC9maWxlIDEw\n" - "MDAwMDANCklORk86IGRzdCAweDdmNzY0MDAwMDAwMCBzcmMgMHg2MDEyMDAgZHN0c3ogMjc1NjUw\n" - "OTY5NiBjcHlzeiAxNjM4NA0KcmVhbCAxLjI3DQp1c2VyIDEuMjcNCnN5cyAwLjAwDQoNCkluIHRo\n" - "aXMgc2ltcGxlIHRlc3QsIHVzaW5nIG5vbi10ZW1wb3JhbCBjb3B5IGlzIHN0aWxsIGZhc3RlciB0\n" - aGFuIHVzaW5nDQpjYWNoZWQgY29weS4NCg0KVGhhbmtzLA0KLVRvc2hpDQoNCg== + "On Tue, 2017-01-17 at 16:59 +0100, Jan Kara wrote:\n" + "> On Fri 13-01-17 17:20:08, Ross Zwisler wrote:\n" + " :\n" + "> > - If I recall correctly, at one point Dave Chinner suggested that\n" + "> > we change - If I recall correctly, at one point Dave Chinner\n" + "> > suggested that we change \302\240 DAX so that I/O would use cached stores\n" + "> > instead of the non-temporal stores \302\240 that it currently uses.\302\240\302\240We\n" + "> > would then track pages that were written to by DAX in the radix\n" + "> > tree so that they would be flushed later during \302\240\n" + "> > fsync/msync.\302\240\302\240Does this sound like a win?\302\240\302\240Also, assuming that we\n" + "> > can find a solution for platforms where the processor cache is part\n" + "> > of the ADR safe zone (above topic) this would be a clear\n" + "> > improvement, moving us from using non-temporal stores to faster\n" + "> > cached stores with no downside.\n" + "> \n" + "> I guess this needs measurements. But it is worth a try.\n" + "\n" + "Brain Boylston did some measurement before.\n" + "http://oss.sgi.com/archives/xfs/2016-08/msg00239.html\n" + "\n" + "I updated his test program to skip pmem_persist() for the cached copy\n" + "case.\n" + "\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240dst = dstbase;\n" + "+ #if 0\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240/* see note above */\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240if (mode == 'c')\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240pmem_persist(dst, dstsz);\n" + "+ #endif\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240}\n" + "\n" + "Here are sample runs:\n" + "\n" + "$ numactl -N0 time -p ./memcpyperf c /mnt/pmem0/file 1000000\n" + "INFO: dst 0x7f1d00000000 src 0x601200 dstsz 2756509696 cpysz 16384\n" + "real 3.28\n" + "user 3.27\n" + "sys 0.00\n" + "\n" + "$ numactl -N0 time -p ./memcpyperf n /mnt/pmem0/file 1000000\n" + "INFO: dst 0x7f6080000000 src 0x601200 dstsz 2756509696 cpysz 16384\n" + "real 1.01\n" + "user 1.01\n" + "sys 0.00\n" + "\n" + "$ numactl -N1 time -p ./memcpyperf c /mnt/pmem0/file 1000000\n" + "INFO: dst 0x7fe900000000 src 0x601200 dstsz 2756509696 cpysz 16384\n" + "real 4.06\n" + "user 4.06\n" + "sys 0.00\n" + "\n" + "$ numactl -N1 time -p ./memcpyperf n /mnt/pmem0/file 1000000\n" + "INFO: dst 0x7f7640000000 src 0x601200 dstsz 2756509696 cpysz 16384\n" + "real 1.27\n" + "user 1.27\n" + "sys 0.00\n" + "\n" + "In this simple test, using non-temporal copy is still faster than using\n" + "cached copy.\n" + "\n" + "Thanks,\n" + "-Toshi\n" + "\n" + "_______________________________________________\n" + "Linux-nvdimm mailing list\n" + "Linux-nvdimm@lists.01.org\n" + https://lists.01.org/mailman/listinfo/linux-nvdimm -30e7c56ece40e243dd40b49892d31027f080b3adf319471274ccb17334cf5869 +3347b0cb462430c21e21e2b6fb23132f0f94c4e2e93706085e88b6e0633008ef
diff --git a/a/1.txt b/N2/1.txt index d0379bb..86b2889 100644 --- a/a/1.txt +++ b/N2/1.txt @@ -1,39 +1,62 @@ -T24gVHVlLCAyMDE3LTAxLTE3IGF0IDE2OjU5ICswMTAwLCBKYW4gS2FyYSB3cm90ZToNCj4gT24g -RnJpIDEzLTAxLTE3IDE3OjIwOjA4LCBSb3NzIFp3aXNsZXIgd3JvdGU6DQogOg0KPiA+IC0gSWYg -SSByZWNhbGwgY29ycmVjdGx5LCBhdCBvbmUgcG9pbnQgRGF2ZSBDaGlubmVyIHN1Z2dlc3RlZCB0 -aGF0DQo+ID4gd2UgY2hhbmdlIC0gSWYgSSByZWNhbGwgY29ycmVjdGx5LCBhdCBvbmUgcG9pbnQg -RGF2ZSBDaGlubmVyDQo+ID4gc3VnZ2VzdGVkIHRoYXQgd2UgY2hhbmdlIMKgIERBWCBzbyB0aGF0 -IEkvTyB3b3VsZCB1c2UgY2FjaGVkIHN0b3Jlcw0KPiA+IGluc3RlYWQgb2YgdGhlIG5vbi10ZW1w -b3JhbCBzdG9yZXMgwqAgdGhhdCBpdCBjdXJyZW50bHkgdXNlcy7CoMKgV2UNCj4gPiB3b3VsZCB0 -aGVuIHRyYWNrIHBhZ2VzIHRoYXQgd2VyZSB3cml0dGVuIHRvIGJ5IERBWCBpbiB0aGUgcmFkaXgN -Cj4gPiB0cmVlIHNvIHRoYXQgdGhleSB3b3VsZCBiZSBmbHVzaGVkIGxhdGVyIGR1cmluZyDCoA0K -PiA+IGZzeW5jL21zeW5jLsKgwqBEb2VzIHRoaXMgc291bmQgbGlrZSBhIHdpbj/CoMKgQWxzbywg -YXNzdW1pbmcgdGhhdCB3ZQ0KPiA+IGNhbiBmaW5kIGEgc29sdXRpb24gZm9yIHBsYXRmb3JtcyB3 -aGVyZSB0aGUgcHJvY2Vzc29yIGNhY2hlIGlzIHBhcnQNCj4gPiBvZiB0aGUgQURSIHNhZmUgem9u -ZSAoYWJvdmUgdG9waWMpIHRoaXMgd291bGQgYmUgYSBjbGVhcg0KPiA+IGltcHJvdmVtZW50LCBt -b3ZpbmcgdXMgZnJvbSB1c2luZyBub24tdGVtcG9yYWwgc3RvcmVzIHRvIGZhc3Rlcg0KPiA+IGNh -Y2hlZCBzdG9yZXMgd2l0aCBubyBkb3duc2lkZS4NCj4gDQo+IEkgZ3Vlc3MgdGhpcyBuZWVkcyBt -ZWFzdXJlbWVudHMuIEJ1dCBpdCBpcyB3b3J0aCBhIHRyeS4NCg0KQnJhaW4gQm95bHN0b24gZGlk -IHNvbWUgbWVhc3VyZW1lbnQgYmVmb3JlLg0KaHR0cDovL29zcy5zZ2kuY29tL2FyY2hpdmVzL3hm -cy8yMDE2LTA4L21zZzAwMjM5Lmh0bWwNCg0KSSB1cGRhdGVkIGhpcyB0ZXN0IHByb2dyYW0gdG8g -c2tpcCBwbWVtX3BlcnNpc3QoKSBmb3IgdGhlIGNhY2hlZCBjb3B5DQpjYXNlLg0KDQrCoMKgwqDC -oMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqBkc3QgPSBkc3RiYXNlOw0K -KyAjaWYgMA0KwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg -Lyogc2VlIG5vdGUgYWJvdmUgKi8NCsKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDC -oMKgwqDCoMKgwqDCoGlmIChtb2RlID09ICdjJykNCsKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg -wqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqBwbWVtX3BlcnNpc3QoZHN0LCBk -c3Rzeik7DQorICNlbmRpZg0KwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqB9DQoNCkhl -cmUgYXJlIHNhbXBsZSBydW5zOg0KDQokIG51bWFjdGwgLU4wIHRpbWUgLXAgLi9tZW1jcHlwZXJm -IGMgL21udC9wbWVtMC9maWxlIDEwMDAwMDANCklORk86IGRzdCAweDdmMWQwMDAwMDAwMCBzcmMg -MHg2MDEyMDAgZHN0c3ogMjc1NjUwOTY5NiBjcHlzeiAxNjM4NA0KcmVhbCAzLjI4DQp1c2VyIDMu -MjcNCnN5cyAwLjAwDQoNCiQgbnVtYWN0bCAtTjAgdGltZSAtcCAuL21lbWNweXBlcmYgbiAvbW50 -L3BtZW0wL2ZpbGUgMTAwMDAwMA0KSU5GTzogZHN0IDB4N2Y2MDgwMDAwMDAwIHNyYyAweDYwMTIw -MCBkc3RzeiAyNzU2NTA5Njk2IGNweXN6IDE2Mzg0DQpyZWFsIDEuMDENCnVzZXIgMS4wMQ0Kc3lz -IDAuMDANCg0KJCBudW1hY3RsIC1OMSB0aW1lIC1wIC4vbWVtY3B5cGVyZiBjIC9tbnQvcG1lbTAv -ZmlsZSAxMDAwMDAwDQpJTkZPOiBkc3QgMHg3ZmU5MDAwMDAwMDAgc3JjIDB4NjAxMjAwIGRzdHN6 -IDI3NTY1MDk2OTYgY3B5c3ogMTYzODQNCnJlYWwgNC4wNg0KdXNlciA0LjA2DQpzeXMgMC4wMA0K -DQokIG51bWFjdGwgLU4xIHRpbWUgLXAgLi9tZW1jcHlwZXJmIG4gL21udC9wbWVtMC9maWxlIDEw -MDAwMDANCklORk86IGRzdCAweDdmNzY0MDAwMDAwMCBzcmMgMHg2MDEyMDAgZHN0c3ogMjc1NjUw -OTY5NiBjcHlzeiAxNjM4NA0KcmVhbCAxLjI3DQp1c2VyIDEuMjcNCnN5cyAwLjAwDQoNCkluIHRo -aXMgc2ltcGxlIHRlc3QsIHVzaW5nIG5vbi10ZW1wb3JhbCBjb3B5IGlzIHN0aWxsIGZhc3RlciB0 -aGFuIHVzaW5nDQpjYWNoZWQgY29weS4NCg0KVGhhbmtzLA0KLVRvc2hpDQoNCg== +On Tue, 2017-01-17 at 16:59 +0100, Jan Kara wrote: +> On Fri 13-01-17 17:20:08, Ross Zwisler wrote: + : +> > - If I recall correctly, at one point Dave Chinner suggested that +> > we change - If I recall correctly, at one point Dave Chinner +> > suggested that we change DAX so that I/O would use cached stores +> > instead of the non-temporal stores that it currently uses. We +> > would then track pages that were written to by DAX in the radix +> > tree so that they would be flushed later during +> > fsync/msync. Does this sound like a win? Also, assuming that we +> > can find a solution for platforms where the processor cache is part +> > of the ADR safe zone (above topic) this would be a clear +> > improvement, moving us from using non-temporal stores to faster +> > cached stores with no downside. +> +> I guess this needs measurements. But it is worth a try. + +Brain Boylston did some measurement before. +http://oss.sgi.com/archives/xfs/2016-08/msg00239.html + +I updated his test program to skip pmem_persist() for the cached copy +case. + + dst = dstbase; ++ #if 0 + /* see note above */ + if (mode == 'c') + pmem_persist(dst, dstsz); ++ #endif + } + +Here are sample runs: + +$ numactl -N0 time -p ./memcpyperf c /mnt/pmem0/file 1000000 +INFO: dst 0x7f1d00000000 src 0x601200 dstsz 2756509696 cpysz 16384 +real 3.28 +user 3.27 +sys 0.00 + +$ numactl -N0 time -p ./memcpyperf n /mnt/pmem0/file 1000000 +INFO: dst 0x7f6080000000 src 0x601200 dstsz 2756509696 cpysz 16384 +real 1.01 +user 1.01 +sys 0.00 + +$ numactl -N1 time -p ./memcpyperf c /mnt/pmem0/file 1000000 +INFO: dst 0x7fe900000000 src 0x601200 dstsz 2756509696 cpysz 16384 +real 4.06 +user 4.06 +sys 0.00 + +$ numactl -N1 time -p ./memcpyperf n /mnt/pmem0/file 1000000 +INFO: dst 0x7f7640000000 src 0x601200 dstsz 2756509696 cpysz 16384 +real 1.27 +user 1.27 +sys 0.00 + +In this simple test, using non-temporal copy is still faster than using +cached copy. + +Thanks, +-Toshi diff --git a/a/content_digest b/N2/content_digest index 0450e64..164af85 100644 --- a/a/content_digest +++ b/N2/content_digest @@ -12,44 +12,67 @@ " linux-fsdevel@vger.kernel.org <linux-fsdevel@vger.kernel.org>\0" "\00:1\0" "b\0" - "T24gVHVlLCAyMDE3LTAxLTE3IGF0IDE2OjU5ICswMTAwLCBKYW4gS2FyYSB3cm90ZToNCj4gT24g\n" - "RnJpIDEzLTAxLTE3IDE3OjIwOjA4LCBSb3NzIFp3aXNsZXIgd3JvdGU6DQogOg0KPiA+IC0gSWYg\n" - "SSByZWNhbGwgY29ycmVjdGx5LCBhdCBvbmUgcG9pbnQgRGF2ZSBDaGlubmVyIHN1Z2dlc3RlZCB0\n" - "aGF0DQo+ID4gd2UgY2hhbmdlIC0gSWYgSSByZWNhbGwgY29ycmVjdGx5LCBhdCBvbmUgcG9pbnQg\n" - "RGF2ZSBDaGlubmVyDQo+ID4gc3VnZ2VzdGVkIHRoYXQgd2UgY2hhbmdlIMKgIERBWCBzbyB0aGF0\n" - "IEkvTyB3b3VsZCB1c2UgY2FjaGVkIHN0b3Jlcw0KPiA+IGluc3RlYWQgb2YgdGhlIG5vbi10ZW1w\n" - "b3JhbCBzdG9yZXMgwqAgdGhhdCBpdCBjdXJyZW50bHkgdXNlcy7CoMKgV2UNCj4gPiB3b3VsZCB0\n" - "aGVuIHRyYWNrIHBhZ2VzIHRoYXQgd2VyZSB3cml0dGVuIHRvIGJ5IERBWCBpbiB0aGUgcmFkaXgN\n" - "Cj4gPiB0cmVlIHNvIHRoYXQgdGhleSB3b3VsZCBiZSBmbHVzaGVkIGxhdGVyIGR1cmluZyDCoA0K\n" - "PiA+IGZzeW5jL21zeW5jLsKgwqBEb2VzIHRoaXMgc291bmQgbGlrZSBhIHdpbj/CoMKgQWxzbywg\n" - "YXNzdW1pbmcgdGhhdCB3ZQ0KPiA+IGNhbiBmaW5kIGEgc29sdXRpb24gZm9yIHBsYXRmb3JtcyB3\n" - "aGVyZSB0aGUgcHJvY2Vzc29yIGNhY2hlIGlzIHBhcnQNCj4gPiBvZiB0aGUgQURSIHNhZmUgem9u\n" - "ZSAoYWJvdmUgdG9waWMpIHRoaXMgd291bGQgYmUgYSBjbGVhcg0KPiA+IGltcHJvdmVtZW50LCBt\n" - "b3ZpbmcgdXMgZnJvbSB1c2luZyBub24tdGVtcG9yYWwgc3RvcmVzIHRvIGZhc3Rlcg0KPiA+IGNh\n" - "Y2hlZCBzdG9yZXMgd2l0aCBubyBkb3duc2lkZS4NCj4gDQo+IEkgZ3Vlc3MgdGhpcyBuZWVkcyBt\n" - "ZWFzdXJlbWVudHMuIEJ1dCBpdCBpcyB3b3J0aCBhIHRyeS4NCg0KQnJhaW4gQm95bHN0b24gZGlk\n" - "IHNvbWUgbWVhc3VyZW1lbnQgYmVmb3JlLg0KaHR0cDovL29zcy5zZ2kuY29tL2FyY2hpdmVzL3hm\n" - "cy8yMDE2LTA4L21zZzAwMjM5Lmh0bWwNCg0KSSB1cGRhdGVkIGhpcyB0ZXN0IHByb2dyYW0gdG8g\n" - "c2tpcCBwbWVtX3BlcnNpc3QoKSBmb3IgdGhlIGNhY2hlZCBjb3B5DQpjYXNlLg0KDQrCoMKgwqDC\n" - "oMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqBkc3QgPSBkc3RiYXNlOw0K\n" - "KyAjaWYgMA0KwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg\n" - "Lyogc2VlIG5vdGUgYWJvdmUgKi8NCsKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDC\n" - "oMKgwqDCoMKgwqDCoGlmIChtb2RlID09ICdjJykNCsKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKg\n" - "wqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqBwbWVtX3BlcnNpc3QoZHN0LCBk\n" - "c3Rzeik7DQorICNlbmRpZg0KwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqB9DQoNCkhl\n" - "cmUgYXJlIHNhbXBsZSBydW5zOg0KDQokIG51bWFjdGwgLU4wIHRpbWUgLXAgLi9tZW1jcHlwZXJm\n" - "IGMgL21udC9wbWVtMC9maWxlIDEwMDAwMDANCklORk86IGRzdCAweDdmMWQwMDAwMDAwMCBzcmMg\n" - "MHg2MDEyMDAgZHN0c3ogMjc1NjUwOTY5NiBjcHlzeiAxNjM4NA0KcmVhbCAzLjI4DQp1c2VyIDMu\n" - "MjcNCnN5cyAwLjAwDQoNCiQgbnVtYWN0bCAtTjAgdGltZSAtcCAuL21lbWNweXBlcmYgbiAvbW50\n" - "L3BtZW0wL2ZpbGUgMTAwMDAwMA0KSU5GTzogZHN0IDB4N2Y2MDgwMDAwMDAwIHNyYyAweDYwMTIw\n" - "MCBkc3RzeiAyNzU2NTA5Njk2IGNweXN6IDE2Mzg0DQpyZWFsIDEuMDENCnVzZXIgMS4wMQ0Kc3lz\n" - "IDAuMDANCg0KJCBudW1hY3RsIC1OMSB0aW1lIC1wIC4vbWVtY3B5cGVyZiBjIC9tbnQvcG1lbTAv\n" - "ZmlsZSAxMDAwMDAwDQpJTkZPOiBkc3QgMHg3ZmU5MDAwMDAwMDAgc3JjIDB4NjAxMjAwIGRzdHN6\n" - "IDI3NTY1MDk2OTYgY3B5c3ogMTYzODQNCnJlYWwgNC4wNg0KdXNlciA0LjA2DQpzeXMgMC4wMA0K\n" - "DQokIG51bWFjdGwgLU4xIHRpbWUgLXAgLi9tZW1jcHlwZXJmIG4gL21udC9wbWVtMC9maWxlIDEw\n" - "MDAwMDANCklORk86IGRzdCAweDdmNzY0MDAwMDAwMCBzcmMgMHg2MDEyMDAgZHN0c3ogMjc1NjUw\n" - "OTY5NiBjcHlzeiAxNjM4NA0KcmVhbCAxLjI3DQp1c2VyIDEuMjcNCnN5cyAwLjAwDQoNCkluIHRo\n" - "aXMgc2ltcGxlIHRlc3QsIHVzaW5nIG5vbi10ZW1wb3JhbCBjb3B5IGlzIHN0aWxsIGZhc3RlciB0\n" - aGFuIHVzaW5nDQpjYWNoZWQgY29weS4NCg0KVGhhbmtzLA0KLVRvc2hpDQoNCg== + "On Tue, 2017-01-17 at 16:59 +0100, Jan Kara wrote:\n" + "> On Fri 13-01-17 17:20:08, Ross Zwisler wrote:\n" + " :\n" + "> > - If I recall correctly, at one point Dave Chinner suggested that\n" + "> > we change - If I recall correctly, at one point Dave Chinner\n" + "> > suggested that we change \302\240 DAX so that I/O would use cached stores\n" + "> > instead of the non-temporal stores \302\240 that it currently uses.\302\240\302\240We\n" + "> > would then track pages that were written to by DAX in the radix\n" + "> > tree so that they would be flushed later during \302\240\n" + "> > fsync/msync.\302\240\302\240Does this sound like a win?\302\240\302\240Also, assuming that we\n" + "> > can find a solution for platforms where the processor cache is part\n" + "> > of the ADR safe zone (above topic) this would be a clear\n" + "> > improvement, moving us from using non-temporal stores to faster\n" + "> > cached stores with no downside.\n" + "> \n" + "> I guess this needs measurements. But it is worth a try.\n" + "\n" + "Brain Boylston did some measurement before.\n" + "http://oss.sgi.com/archives/xfs/2016-08/msg00239.html\n" + "\n" + "I updated his test program to skip pmem_persist() for the cached copy\n" + "case.\n" + "\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240dst = dstbase;\n" + "+ #if 0\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240/* see note above */\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240if (mode == 'c')\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240pmem_persist(dst, dstsz);\n" + "+ #endif\n" + "\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240\302\240}\n" + "\n" + "Here are sample runs:\n" + "\n" + "$ numactl -N0 time -p ./memcpyperf c /mnt/pmem0/file 1000000\n" + "INFO: dst 0x7f1d00000000 src 0x601200 dstsz 2756509696 cpysz 16384\n" + "real 3.28\n" + "user 3.27\n" + "sys 0.00\n" + "\n" + "$ numactl -N0 time -p ./memcpyperf n /mnt/pmem0/file 1000000\n" + "INFO: dst 0x7f6080000000 src 0x601200 dstsz 2756509696 cpysz 16384\n" + "real 1.01\n" + "user 1.01\n" + "sys 0.00\n" + "\n" + "$ numactl -N1 time -p ./memcpyperf c /mnt/pmem0/file 1000000\n" + "INFO: dst 0x7fe900000000 src 0x601200 dstsz 2756509696 cpysz 16384\n" + "real 4.06\n" + "user 4.06\n" + "sys 0.00\n" + "\n" + "$ numactl -N1 time -p ./memcpyperf n /mnt/pmem0/file 1000000\n" + "INFO: dst 0x7f7640000000 src 0x601200 dstsz 2756509696 cpysz 16384\n" + "real 1.27\n" + "user 1.27\n" + "sys 0.00\n" + "\n" + "In this simple test, using non-temporal copy is still faster than using\n" + "cached copy.\n" + "\n" + "Thanks,\n" + -Toshi -30e7c56ece40e243dd40b49892d31027f080b3adf319471274ccb17334cf5869 +a16bd5bfe163bf84ce2cd76e1a2b8ef8938b58019cbc4b9ba3c6583359e5b0a9
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.