From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Thu, 8 May 2014 14:58:14 +0530 Message-ID: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Return-path: Sender: owner-linux-mm@kvack.org To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Cc: benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com, Madhavan Srinivasan List-Id: linux-arch.vger.kernel.org Kirill A. Shutemov with 8c6e50b029 commit introduced vm_ops->map_pages() for mapping easy accessible pages around fault address in hope to reduce number of minor page faults. This patch creates infrastructure to modify the FAULT_AROUND_ORDER value using mm/Kconfig. This will enable architecture maintainers to decide on suitable FAULT_AROUND_ORDER value based on performance data for that architecture. First patch also defaults FAULT_AROUND_ORDER Kconfig element to 4. Second patch list out the performance numbers for powerpc (platform pseries) and initialize the fault around order variable for pseries platform of powerpc. V4 Changes: Replaced the BUILD_BUG_ON with VM_BUG_ON. Moved fault_around_pages() and fault_around_mask() functions outside of #ifdef CONFIG_DEBUG_FS. V3 Changes: Replaced FAULT_AROUND_ORDER macro to a variable to support arch's that supports sub platforms. Made changes in commit messages. V2 Changes: Created Kconfig parameter for FAULT_AROUND_ORDER Added check in do_read_fault to handle FAULT_AROUND_ORDER value of 0 Made changes in commit messages. Madhavan Srinivasan (2): mm: move FAULT_AROUND_ORDER to arch/ powerpc/pseries: init fault_around_order for pseries arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 5 +++++ mm/Kconfig | 8 ++++++++ mm/memory.c | 25 ++++++------------------- 4 files changed, 21 insertions(+), 19 deletions(-) -- 1.7.10.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: [PATCH V4 1/2] mm: move FAULT_AROUND_ORDER to arch/ Date: Thu, 8 May 2014 14:58:15 +0530 Message-ID: <1399541296-18810-2-git-send-email-maddy@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Return-path: In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Sender: owner-linux-mm@kvack.org To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Cc: benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com, Madhavan Srinivasan List-Id: linux-arch.vger.kernel.org Kirill A. Shutemov with 8c6e50b029 commit introduced vm_ops->map_pages() for mapping easy accessible pages around fault address in hope to reduce number of minor page faults. This patch creates infrastructure to modify the FAULT_AROUND_ORDER value using mm/Kconfig. This will enable architecture maintainers to decide on suitable FAULT_AROUND_ORDER value based on performance data for that architecture. Patch also defaults FAULT_AROUND_ORDER Kconfig element to 4. Signed-off-by: Madhavan Srinivasan --- mm/Kconfig | 8 ++++++++ mm/memory.c | 25 ++++++------------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index ebe5880..c7fc4f1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -176,6 +176,14 @@ config MOVABLE_NODE config HAVE_BOOTMEM_INFO_NODE def_bool n +# +# Fault around order is a control knob to decide the fault around pages. +# Default value is set to 4 , but the arch can override it as desired. +# +config FAULT_AROUND_ORDER + int + default 4 + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" diff --git a/mm/memory.c b/mm/memory.c index 037b812..e3931ef 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,11 +3402,9 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +unsigned int fault_around_order __read_mostly = CONFIG_FAULT_AROUND_ORDER; #ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; - static int fault_around_order_get(void *data, u64 *val) { *val = fault_around_order; @@ -3415,7 +3413,6 @@ static int fault_around_order_get(void *data, u64 *val) static int fault_around_order_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); if (1UL << val > PTRS_PER_PTE) return -EINVAL; fault_around_order = val; @@ -3435,31 +3432,21 @@ static int __init fault_around_debugfs(void) return 0; } late_initcall(fault_around_debugfs); +#endif static inline unsigned long fault_around_pages(void) { - return 1UL << fault_around_order; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); -} -#else -static inline unsigned long fault_around_pages(void) -{ unsigned long nr_pages; - nr_pages = 1UL << FAULT_AROUND_ORDER; - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); + nr_pages = 1UL << fault_around_order; + VM_BUG_ON(nr_pages > PTRS_PER_PTE); return nr_pages; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); + return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); } -#endif static void do_fault_around(struct vm_area_struct *vma, unsigned long address, pte_t *pte, pgoff_t pgoff, unsigned int flags) @@ -3515,7 +3502,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if ((vma->vm_ops->map_pages) && fault_around_order) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- 1.7.10.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries Date: Thu, 8 May 2014 14:58:16 +0530 Message-ID: <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Return-path: In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Sender: owner-linux-mm@kvack.org To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Cc: benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com, Madhavan Srinivasan List-Id: linux-arch.vger.kernel.org Performance data for different FAULT_AROUND_ORDER values from 4 socket Power7 system (128 Threads and 128GB memory). perf stat with repeat of 5 is used to get the stddev values. Test ran in v3.14 kernel (Baseline) and v3.15-rc1 for different fault around order values. %change here is calculated in this method ((new value - baseline)/baseline). And negative %change says its a drop in time. FAULT_AROUND_ORDER Baseline 1 3 4 5 8 Linux build (make -j64) minor-faults 47,437,359 35,279,286 25,425,347 23,461,275 22,002,189 21,435,836 times in seconds 347.302528420 344.061588460 340.974022391 348.193508116 348.673900158 350.986543618 stddev for time ( +- 1.50% ) ( +- 0.73% ) ( +- 1.13% ) ( +- 1.01% ) ( +- 1.89% ) ( +- 1.55% ) %chg time to baseline -0.9% -1.8% 0.2% 0.39% 1.06% Linux rebuild (make -j64) minor-faults 941,552 718,319 486,625 440,124 410,510 397,416 times in seconds 30.569834718 31.219637539 31.319370649 31.434285472 31.972367174 31.443043580 stddev for time ( +- 1.07% ) ( +- 0.13% ) ( +- 0.43% ) ( +- 0.18% ) ( +- 0.95% ) ( +- 0.58% ) %chg time to baseline 2.1% 2.4% 2.8% 4.58% 2.85% Binutils build (make all -j64 ) minor-faults 474,821 371,380 269,463 247,715 235,255 228,337 times in seconds 53.882492432 53.584289348 53.882773216 53.755816431 53.607824348 53.423759642 stddev for time ( +- 0.08% ) ( +- 0.56% ) ( +- 0.17% ) ( +- 0.11% ) ( +- 0.60% ) ( +- 0.69% ) %chg time to baseline -0.55% 0.0% -0.23% -0.51% -0.85% Two synthetic tests: access every word in file in sequential/random order. Sequential access 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 263,148 131,166 32,908 16,514 8,260 1,093 times in seconds 53.091138345 53.113191672 53.188776177 53.233017218 53.206841347 53.429979442 stddev for time ( +- 0.06% ) ( +- 0.07% ) ( +- 0.08% ) ( +- 0.09% ) ( +- 0.03% ) ( +- 0.03% ) %chg time to baseline 0.04% 0.18% 0.26% 0.21% 0.63% 8 threads minor-faults 2,097,267 1,048,753 262,237 131,397 65,621 8,274 times in seconds 55.173790028 54.591880790 54.824623287 54.802162211 54.969680503 54.790387715 stddev for time ( +- 0.78% ) ( +- 0.09% ) ( +- 0.08% ) ( +- 0.07% ) ( +- 0.28% ) ( +- 0.05% ) %chg time to baseline -1.05% -0.63% -0.67% -0.36% -0.69% 32 threads minor-faults 8,388,751 4,195,621 1,049,664 525,461 262,535 32,924 times in seconds 60.431573046 60.669110744 60.485336388 60.697789706 60.077959564 60.588855032 stddev for time ( +- 0.44% ) ( +- 0.27% ) ( +- 0.46% ) ( +- 0.67% ) ( +- 0.31% ) ( +- 0.49% ) %chg time to baseline 0.39% 0.08% 0.44% -0.58% 0.25% 64 threads minor-faults 16,777,409 8,607,527 2,289,766 1,202,264 598,405 67,587 times in seconds 96.932617720 100.675418760 102.109880836 103.881733383 102.580199555 105.751194041 stddev for time ( +- 1.39% ) ( +- 1.06% ) ( +- 0.99% ) ( +- 0.76% ) ( +- 1.65% ) ( +- 1.60% ) %chg time to baseline 3.86% 5.34% 7.16% 5.82% 9.09% 128 threads minor-faults 33,554,705 17,375,375 4,682,462 2,337,245 1,179,007 134,819 times in seconds 128.766704495 115.659225437 120.353046307 115.291871270 115.450886036 113.991902150 stddev for time ( +- 2.93% ) ( +- 0.30% ) ( +- 2.93% ) ( +- 1.24% ) ( +- 1.03% ) ( +- 0.70% ) %chg time to baseline -10.17% -6.53% -10.46% -10.34% -11.47% Random access 1GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 17,155 8,678 2,126 1,097 581 134 times in seconds 51.904430523 51.658017987 51.919270792 51.560531738 52.354431597 51.976469502 stddev for time ( +- 3.19% ) ( +- 1.35% ) ( +- 1.56% ) ( +- 0.91% ) ( +- 1.70% ) ( +- 2.02% ) %chg time to baseline -0.47% 0.02% -0.66% 0.86% 0.13% 8 threads minor-faults 131,844 70,705 17,457 8,505 4,251 598 times in seconds 58.162813956 54.991706305 54.952675791 55.323057492 54.755587379 53.376722828 stddev for time ( +- 1.44% ) ( +- 0.69% ) ( +- 1.23% ) ( +- 2.78% ) ( +- 1.90% ) ( +- 2.91% ) %chg time to baseline -5.45% -5.52% -4.88% -5.86% -8.22% 32 threads minor-faults 524,437 270,760 67,069 33,414 16,641 2,204 times in seconds 69.981777072 76.539570015 79.753578505 76.245943618 77.254258344 79.072596831 stddev for time ( +- 2.81% ) ( +- 1.95% ) ( +- 2.66% ) ( +- 0.99% ) ( +- 2.35% ) ( +- 3.22% ) %chg time to baseline 9.37% 13.96% 8.95% 10.39% 12.98% 64 threads minor-faults 1,049,117 527,451 134,016 66,638 33,391 4,559 times in seconds 108.024517536 117.575067996 115.322659914 111.943998437 115.049450815 119.218450840 stddev for time ( +- 2.40% ) ( +- 1.77% ) ( +- 1.19% ) ( +- 3.29% ) ( +- 2.32% ) ( +- 1.42% ) %chg time to baseline 8.84% 6.75% 3.62% 6.5% 10.3% 128 threads minor-faults 2,097,440 1,054,360 267,042 133,328 66,532 8,652 times in seconds 155.055861167 153.059625968 152.449492156 151.024005282 150.844647770 155.954366718 stddev for time ( +- 1.32% ) ( +- 1.14% ) ( +- 1.32% ) ( +- 0.81% ) ( +- 0.75% ) ( +- 0.72% ) %chg time to baseline -1.28% -1.68% -2.59% -2.71% 0.57% In case of kernel build, fault around order (fao) value of 1 and 3 wins when compared to 4 (but bit noisy). Incase of kernel rebuild, slowdown for fao > 0 is seen. Incase of synthetic test, there are sporadic agains, but mostly slowdown. No clear sweet spot fao value that can be suggested for the ppc64/pseries with the current performance data. Hence, patch suggest value of zero to the fao. Worst case scenario: we touch one page every 16M to demonstrate overhead. Touch only one page in page table in 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 1,104 1,090 1,071 1,068 1,065 1,063 times in seconds 0.006583298 0.008531502 0.019733795 0.036033763 0.062300553 0.406857086 stddev for time ( +- 2.79% ) ( +- 2.42% ) ( +- 3.47% ) ( +- 2.81% ) ( +- 2.01% ) ( +- 1.33% ) 8 threads minor-faults 8,279 8,264 8,245 8,243 8,239 8,240 times in seconds 0.044572398 0.057211811 0.107606306 0.205626815 0.381679120 2.647979955 stddev for time ( +- 1.95% ) ( +- 2.98% ) ( +- 1.74% ) ( +- 2.80% ) ( +- 2.01% ) ( +- 1.86% ) 32 threads minor-faults 32,879 32,864 32,849 32,845 32,839 32,843 times in seconds 0.197659343 0.218486087 0.445116407 0.694235883 1.296894038 9.127517045 stddev for time ( +- 3.05% ) ( +- 3.05% ) ( +- 4.33% ) ( +- 3.08% ) ( +- 3.75% ) ( +- 0.56% ) 64 threads minor-faults 65,680 65,664 65,646 65,645 65,640 65,647 times in seconds 0.455537304 0.489688780 0.866490093 1.427393118 2.379628982 17.059295051 stddev for time ( +- 4.01% ) ( +- 4.13% ) ( +- 2.92% ) ( +- 1.68% ) ( +- 1.79% ) ( +- 0.48% ) 128 threads minor-faults 131,279 131,265 131,250 131,245 131,241 131,254 times in seconds 1.026880651 1.095327536 1.721728274 2.808233068 4.662729948 31.732848290 stddev for time ( +- 6.85% ) ( +- 4.09% ) ( +- 1.71% ) ( +- 3.45% ) ( +- 2.40% ) ( +- 0.68% ) Signed-off-by: Madhavan Srinivasan --- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 9921953..6e6c993 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -17,6 +17,8 @@ struct device_node; extern void request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name); +extern unsigned int fault_around_order; + #include extern void __init fw_hypertas_feature_init(const char *hypertas, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 2db8cc6..4391c3c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -465,6 +465,11 @@ static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + /* + * Defaulting to zero since no sweet spot value found in the performance test. + */ + fault_around_order = 0; + /* Discover PIC type and setup ppc_md accordingly */ pseries_discover_pic(); -- 1.7.10.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Thu, 15 May 2014 13:55:11 +0530 Message-ID: <537479E7.90806@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Sender: owner-linux-mm@kvack.org To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Cc: benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org Hi Ingo, Do you have any comments for the latest version of the patchset. If not, kindly can you pick it up as is. With regards Maddy > Kirill A. Shutemov with 8c6e50b029 commit introduced > vm_ops->map_pages() for mapping easy accessible pages around > fault address in hope to reduce number of minor page faults. > > This patch creates infrastructure to modify the FAULT_AROUND_ORDER > value using mm/Kconfig. This will enable architecture maintainers > to decide on suitable FAULT_AROUND_ORDER value based on > performance data for that architecture. First patch also defaults > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > out the performance numbers for powerpc (platform pseries) and > initialize the fault around order variable for pseries platform of > powerpc. > > V4 Changes: > Replaced the BUILD_BUG_ON with VM_BUG_ON. > Moved fault_around_pages() and fault_around_mask() functions outside of > #ifdef CONFIG_DEBUG_FS. > > V3 Changes: > Replaced FAULT_AROUND_ORDER macro to a variable to support arch's that > supports sub platforms. > Made changes in commit messages. > > V2 Changes: > Created Kconfig parameter for FAULT_AROUND_ORDER > Added check in do_read_fault to handle FAULT_AROUND_ORDER value of 0 > Made changes in commit messages. > > Madhavan Srinivasan (2): > mm: move FAULT_AROUND_ORDER to arch/ > powerpc/pseries: init fault_around_order for pseries > > arch/powerpc/platforms/pseries/pseries.h | 2 ++ > arch/powerpc/platforms/pseries/setup.c | 5 +++++ > mm/Kconfig | 8 ++++++++ > mm/memory.c | 25 ++++++------------------- > 4 files changed, 21 insertions(+), 19 deletions(-) > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Thu, 15 May 2014 10:28:17 -0700 (PDT) Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Return-path: In-Reply-To: <537479E7.90806@linux.vnet.ibm.com> Sender: owner-linux-mm@kvack.org To: Madhavan Srinivasan Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org On Thu, 15 May 2014, Madhavan Srinivasan wrote: > > Hi Ingo, > > Do you have any comments for the latest version of the patchset. If > not, kindly can you pick it up as is. > > > With regards > Maddy > > > Kirill A. Shutemov with 8c6e50b029 commit introduced > > vm_ops->map_pages() for mapping easy accessible pages around > > fault address in hope to reduce number of minor page faults. > > > > This patch creates infrastructure to modify the FAULT_AROUND_ORDER > > value using mm/Kconfig. This will enable architecture maintainers > > to decide on suitable FAULT_AROUND_ORDER value based on > > performance data for that architecture. First patch also defaults > > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > > out the performance numbers for powerpc (platform pseries) and > > initialize the fault around order variable for pseries platform of > > powerpc. Sorry for not commenting earlier - just reminded by this ping to Ingo. I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. arch/powerpc/Kconfig suggests that Power supports base page size of 4k, 16k, 64k or 256k. I would expect your optimal fault_around_order to depend very much on the base page size. Perhaps fault_around_size would provide a more useful default? Hugh -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Mon, 19 May 2014 09:42:46 +0930 Message-ID: <87wqdik4n5.fsf@rustcorp.com.au> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain Return-path: In-Reply-To: Sender: owner-linux-mm@kvack.org To: Hugh Dickins , Madhavan Srinivasan Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org Hugh Dickins writes: > On Thu, 15 May 2014, Madhavan Srinivasan wrote: >> >> Hi Ingo, >> >> Do you have any comments for the latest version of the patchset. If >> not, kindly can you pick it up as is. >> >> >> With regards >> Maddy >> >> > Kirill A. Shutemov with 8c6e50b029 commit introduced >> > vm_ops->map_pages() for mapping easy accessible pages around >> > fault address in hope to reduce number of minor page faults. >> > >> > This patch creates infrastructure to modify the FAULT_AROUND_ORDER >> > value using mm/Kconfig. This will enable architecture maintainers >> > to decide on suitable FAULT_AROUND_ORDER value based on >> > performance data for that architecture. First patch also defaults >> > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >> > out the performance numbers for powerpc (platform pseries) and >> > initialize the fault around order variable for pseries platform of >> > powerpc. > > Sorry for not commenting earlier - just reminded by this ping to Ingo. > > I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. > > arch/powerpc/Kconfig suggests that Power supports base page size of > 4k, 16k, 64k or 256k. > > I would expect your optimal fault_around_order to depend very much on > the base page size. It was 64k, which is what PPC64 uses on all the major distributions. You really only get a choice of 4k and 64k with 64 bit power. > Perhaps fault_around_size would provide a more useful default? That seems to fit. With 4k pages and order 4, you're asking for 64k. Maddy's result shows 64k is also reasonable for 64k pages. Perhaps we try to generalize from two data points (a slight improvement over doing it from 1!), eg: /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ unsigned int fault_around_order __read_mostly = (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); Cheers, Rusty. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Mon, 19 May 2014 08:35:53 +0530 Message-ID: <53797511.1050409@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <87wqdik4n5.fsf@rustcorp.com.au> Sender: owner-linux-mm@kvack.org To: Rusty Russell , Hugh Dickins Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > Hugh Dickins writes: >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >>> >>> Hi Ingo, >>> >>> Do you have any comments for the latest version of the patchset. If >>> not, kindly can you pick it up as is. >>> >>> >>> With regards >>> Maddy >>> >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >>>> vm_ops->map_pages() for mapping easy accessible pages around >>>> fault address in hope to reduce number of minor page faults. >>>> >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >>>> value using mm/Kconfig. This will enable architecture maintainers >>>> to decide on suitable FAULT_AROUND_ORDER value based on >>>> performance data for that architecture. First patch also defaults >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >>>> out the performance numbers for powerpc (platform pseries) and >>>> initialize the fault around order variable for pseries platform of >>>> powerpc. >> >> Sorry for not commenting earlier - just reminded by this ping to Ingo. >> >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >> >> arch/powerpc/Kconfig suggests that Power supports base page size of >> 4k, 16k, 64k or 256k. >> >> I would expect your optimal fault_around_order to depend very much on >> the base page size. > > It was 64k, which is what PPC64 uses on all the major distributions. > You really only get a choice of 4k and 64k with 64 bit power. > This is true. PPC64 support multiple pagesize and yes the default page size of 64k, is taken as base pagesize for the tests. >> Perhaps fault_around_size would provide a more useful default? > > That seems to fit. With 4k pages and order 4, you're asking for 64k. > Maddy's result shows 64k is also reasonable for 64k pages. > > Perhaps we try to generalize from two data points (a slight improvement > over doing it from 1!), eg: > > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > unsigned int fault_around_order __read_mostly = > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > This may be right. But these are the concerns, will not this make other arch to pick default without any tuning and also this will remove the compile time option to disable the feature? Thanks for review With regards Maddy > Cheers, > Rusty. > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Mon, 19 May 2014 16:23:07 -0700 (PDT) Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Return-path: In-Reply-To: <53797511.1050409@linux.vnet.ibm.com> Sender: owner-linux-mm@kvack.org To: Madhavan Srinivasan Cc: Rusty Russell , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org On Mon, 19 May 2014, Madhavan Srinivasan wrote: > On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > > Hugh Dickins writes: > >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: > >>> > >>> Hi Ingo, > >>> > >>> Do you have any comments for the latest version of the patchset. If > >>> not, kindly can you pick it up as is. > >>> > >>> > >>> With regards > >>> Maddy > >>> > >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced > >>>> vm_ops->map_pages() for mapping easy accessible pages around > >>>> fault address in hope to reduce number of minor page faults. > >>>> > >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER > >>>> value using mm/Kconfig. This will enable architecture maintainers > >>>> to decide on suitable FAULT_AROUND_ORDER value based on > >>>> performance data for that architecture. First patch also defaults > >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > >>>> out the performance numbers for powerpc (platform pseries) and > >>>> initialize the fault around order variable for pseries platform of > >>>> powerpc. > >> > >> Sorry for not commenting earlier - just reminded by this ping to Ingo. > >> > >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. > >> > >> arch/powerpc/Kconfig suggests that Power supports base page size of > >> 4k, 16k, 64k or 256k. > >> > >> I would expect your optimal fault_around_order to depend very much on > >> the base page size. > > > > It was 64k, which is what PPC64 uses on all the major distributions. > > You really only get a choice of 4k and 64k with 64 bit power. > > > This is true. PPC64 support multiple pagesize and yes the default page > size of 64k, is taken as base pagesize for the tests. > > >> Perhaps fault_around_size would provide a more useful default? > > > > That seems to fit. With 4k pages and order 4, you're asking for 64k. > > Maddy's result shows 64k is also reasonable for 64k pages. > > > > Perhaps we try to generalize from two data points (a slight improvement > > over doing it from 1!), eg: > > > > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > > unsigned int fault_around_order __read_mostly = > > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); Rusty's bimodal answer doesn't seem the right starting point to me. Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be the order of the fault-around size in bytes, and fault_around_pages() use 1UL << (fault_around_order - PAGE_SHIFT) - when that doesn't wrap, of course! That would at least have a better chance of being appropriate for architectures with 8k and 16k pages (Itanium springs to mind). Not necessarily right for them, since each architecture may have different faulting overheads; but a better chance of being right than blindly assuming 4k or 64k pages for everyone. I'd be glad to see that change go into v3.15: what do you think, Kirill, are we too late to make such a change now? Or do you see some objection to it? > This may be right. But these are the concerns, will not this make other > arch to pick default without any tuning Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? Did other architectures, with other page sizes, back that default? Clearly not powerpc. > and also this will remove the > compile time option to disable the feature? Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig for v3.16? I'm not sure whether Rusty was arguing against that or not. I think we are all three concerned to have a more sensible default than what's there at present. I don't feel very strongly about your Kconfig option: I've no objection, if it were to default to byte order 16. Hugh -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 20 May 2014 10:44:06 +0930 Message-ID: <87d2f9jlpd.fsf@rustcorp.com.au> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Return-path: In-Reply-To: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: linuxppc-dev-bounces+glppd-linuxppc64-dev=m.gmane.org@lists.ozlabs.org Sender: "Linuxppc-dev" To: Hugh Dickins , Madhavan Srinivasan Cc: linux-arch@vger.kernel.org, riel@redhat.com, ak@linux.intel.com, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: linux-arch.vger.kernel.org SHVnaCBEaWNraW5zIDxodWdoZEBnb29nbGUuY29tPiB3cml0ZXM6Cj4gT24gTW9uLCAxOSBNYXkg MjAxNCwgTWFkaGF2YW4gU3Jpbml2YXNhbiB3cm90ZToKPj4gT24gTW9uZGF5IDE5IE1heSAyMDE0 IDA1OjQyIEFNLCBSdXN0eSBSdXNzZWxsIHdyb3RlOgo+PiA+IEh1Z2ggRGlja2lucyA8aHVnaGRA Z29vZ2xlLmNvbT4gd3JpdGVzOgo+PiA+PiBPbiBUaHUsIDE1IE1heSAyMDE0LCBNYWRoYXZhbiBT cmluaXZhc2FuIHdyb3RlOgo+PiA+Pj4KPj4gPj4+IEhpIEluZ28sCj4+ID4+Pgo+PiA+Pj4gCURv IHlvdSBoYXZlIGFueSBjb21tZW50cyBmb3IgdGhlIGxhdGVzdCB2ZXJzaW9uIG9mIHRoZSBwYXRj aHNldC4gSWYKPj4gPj4+IG5vdCwga2luZGx5IGNhbiB5b3UgcGljayBpdCB1cCBhcyBpcy4KPj4g Pj4+Cj4+ID4+Pgo+PiA+Pj4gV2l0aCByZWdhcmRzCj4+ID4+PiBNYWRkeQo+PiA+Pj4KPj4gPj4+ PiBLaXJpbGwgQS4gU2h1dGVtb3Ygd2l0aCA4YzZlNTBiMDI5IGNvbW1pdCBpbnRyb2R1Y2VkCj4+ ID4+Pj4gdm1fb3BzLT5tYXBfcGFnZXMoKSBmb3IgbWFwcGluZyBlYXN5IGFjY2Vzc2libGUgcGFn ZXMgYXJvdW5kCj4+ID4+Pj4gZmF1bHQgYWRkcmVzcyBpbiBob3BlIHRvIHJlZHVjZSBudW1iZXIg b2YgbWlub3IgcGFnZSBmYXVsdHMuCj4+ID4+Pj4KPj4gPj4+PiBUaGlzIHBhdGNoIGNyZWF0ZXMg aW5mcmFzdHJ1Y3R1cmUgdG8gbW9kaWZ5IHRoZSBGQVVMVF9BUk9VTkRfT1JERVIKPj4gPj4+PiB2 YWx1ZSB1c2luZyBtbS9LY29uZmlnLiBUaGlzIHdpbGwgZW5hYmxlIGFyY2hpdGVjdHVyZSBtYWlu dGFpbmVycwo+PiA+Pj4+IHRvIGRlY2lkZSBvbiBzdWl0YWJsZSBGQVVMVF9BUk9VTkRfT1JERVIg dmFsdWUgYmFzZWQgb24KPj4gPj4+PiBwZXJmb3JtYW5jZSBkYXRhIGZvciB0aGF0IGFyY2hpdGVj dHVyZS4gRmlyc3QgcGF0Y2ggYWxzbyBkZWZhdWx0cwo+PiA+Pj4+IEZBVUxUX0FST1VORF9PUkRF UiBLY29uZmlnIGVsZW1lbnQgdG8gNC4gU2Vjb25kIHBhdGNoIGxpc3QKPj4gPj4+PiBvdXQgdGhl IHBlcmZvcm1hbmNlIG51bWJlcnMgZm9yIHBvd2VycGMgKHBsYXRmb3JtIHBzZXJpZXMpIGFuZAo+ PiA+Pj4+IGluaXRpYWxpemUgdGhlIGZhdWx0IGFyb3VuZCBvcmRlciB2YXJpYWJsZSBmb3IgcHNl cmllcyBwbGF0Zm9ybSBvZgo+PiA+Pj4+IHBvd2VycGMuCj4+ID4+Cj4+ID4+IFNvcnJ5IGZvciBu b3QgY29tbWVudGluZyBlYXJsaWVyIC0ganVzdCByZW1pbmRlZCBieSB0aGlzIHBpbmcgdG8gSW5n by4KPj4gPj4KPj4gPj4gSSBkaWRuJ3Qgc3R1ZHkgeW91ciBudW1iZXJzLCBidXQgbm93aGVyZSBk aWQgSSBzZWUgd2hhdCBQQUdFX1NJWkUgeW91IHVzZS4KPj4gPj4KPj4gPj4gYXJjaC9wb3dlcnBj L0tjb25maWcgc3VnZ2VzdHMgdGhhdCBQb3dlciBzdXBwb3J0cyBiYXNlIHBhZ2Ugc2l6ZSBvZgo+ PiA+PiA0aywgMTZrLCA2NGsgb3IgMjU2ay4KPj4gPj4KPj4gPj4gSSB3b3VsZCBleHBlY3QgeW91 ciBvcHRpbWFsIGZhdWx0X2Fyb3VuZF9vcmRlciB0byBkZXBlbmQgdmVyeSBtdWNoIG9uCj4+ID4+ IHRoZSBiYXNlIHBhZ2Ugc2l6ZS4KPj4gPiAKPj4gPiBJdCB3YXMgNjRrLCB3aGljaCBpcyB3aGF0 IFBQQzY0IHVzZXMgb24gYWxsIHRoZSBtYWpvciBkaXN0cmlidXRpb25zLgo+PiA+IFlvdSByZWFs bHkgb25seSBnZXQgYSBjaG9pY2Ugb2YgNGsgYW5kIDY0ayB3aXRoIDY0IGJpdCBwb3dlci4KPj4g PiAKPj4gVGhpcyBpcyB0cnVlLiBQUEM2NCBzdXBwb3J0IG11bHRpcGxlIHBhZ2VzaXplIGFuZCB5 ZXMgdGhlIGRlZmF1bHQgcGFnZQo+PiBzaXplIG9mIDY0aywgaXMgdGFrZW4gYXMgYmFzZSBwYWdl c2l6ZSBmb3IgdGhlIHRlc3RzLgo+PiAKPj4gPj4gUGVyaGFwcyBmYXVsdF9hcm91bmRfc2l6ZSB3 b3VsZCBwcm92aWRlIGEgbW9yZSB1c2VmdWwgZGVmYXVsdD8KPj4gPiAKPj4gPiBUaGF0IHNlZW1z IHRvIGZpdC4gIFdpdGggNGsgcGFnZXMgYW5kIG9yZGVyIDQsIHlvdSdyZSBhc2tpbmcgZm9yIDY0 ay4KPj4gPiBNYWRkeSdzIHJlc3VsdCBzaG93cyA2NGsgaXMgYWxzbyByZWFzb25hYmxlIGZvciA2 NGsgcGFnZXMuCj4+ID4gCj4+ID4gUGVyaGFwcyB3ZSB0cnkgdG8gZ2VuZXJhbGl6ZSBmcm9tIHR3 byBkYXRhIHBvaW50cyAoYSBzbGlnaHQgaW1wcm92ZW1lbnQKPj4gPiBvdmVyIGRvaW5nIGl0IGZy b20gMSEpLCBlZzoKPj4gPiAKPj4gPiAvKiA0IHNlZW1zIGdvb2QgZm9yIDRrLXBhZ2UgeDg2LCAw IHNlZW1zIGdvb2QgZm9yIDY0ayBwYWdlIHBwYzY0LCBzbzogKi8KPj4gPiB1bnNpZ25lZCBpbnQg ZmF1bHRfYXJvdW5kX29yZGVyIF9fcmVhZF9tb3N0bHkgPQo+PiA+ICAgICAgICAgKDE2IC0gUEFH RV9TSElGVCA8IDAgPyAwIDogMTYgLSBQQUdFX1NISUZUKTsKPgo+IFJ1c3R5J3MgYmltb2RhbCBh bnN3ZXIgZG9lc24ndCBzZWVtIHRoZSByaWdodCBzdGFydGluZyBwb2ludCB0byBtZS4KCj8gIEl0 J3Mgbm90IGJpbW9kYWwsIGl0J3MgZ3JhZGVkLiAgSSB0aGluayB5b3UgbWlzcmVhZD8KCj4gU2hv dWxkbid0IEZBVUxUX0FST1VORF9PUkRFUiBhbmQgZmF1bHRfYXJvdW5kX29yZGVyIGJlIGNoYW5n ZWQgdG8gYmUKPiB0aGUgb3JkZXIgb2YgdGhlIGZhdWx0LWFyb3VuZCBzaXplIGluIGJ5dGVzLCBh bmQgZmF1bHRfYXJvdW5kX3BhZ2VzKCkKPiB1c2UgMVVMIDw8IChmYXVsdF9hcm91bmRfb3JkZXIg LSBQQUdFX1NISUZUKQo+IC0gd2hlbiB0aGF0IGRvZXNuJ3Qgd3JhcCwgb2YgY291cnNlIQo+Cj4g VGhhdCB3b3VsZCBhdCBsZWFzdCBoYXZlIGEgYmV0dGVyIGNoYW5jZSBvZiBiZWluZyBhcHByb3By aWF0ZSBmb3IKPiBhcmNoaXRlY3R1cmVzIHdpdGggOGsgYW5kIDE2ayBwYWdlcyAoSXRhbml1bSBz cHJpbmdzIHRvIG1pbmQpLgoKV2VsbCwgZnJvbSBvdXIgdHdvIGRhdGEgcG9pbnRzIGl0IHNlZW1z IHRoYXQgd2Ugd2FudCB0byBmYXVsdCBpbgo2NGsgYXQgYSB0aW1lIHdoYXRldmVyIG91ciBwYWdl IHNpemUuICBQZXJoYXBzIGl0J3MgY2xlYXJlciBpZiB0aGUKY29kZSBleHByZXNzZXMgaXRzZWxm IHRoYXQgd2F5LgoKPiBXYXNuJ3QgRkFVTFRfQVJPVU5EX09SREVSIDQgY2hvc2VuIHNvbGVseSBv biB0aGUgYmFzaXMgb2YgeDg2IDRrIHBhZ2VzPwo+IERpZCBvdGhlciBhcmNoaXRlY3R1cmVzLCB3 aXRoIG90aGVyIHBhZ2Ugc2l6ZXMsIGJhY2sgdGhhdCBkZWZhdWx0Pwo+IENsZWFybHkgbm90IHBv d2VycGMuCgpZZWFoLCBCZW5IIGZsYWdnZWQgaXQgYXMgIndlIHNob3VsZCB0ZXN0IHRoaXMiIGZv ciBwb3dlcnBjLCB3aGljaCBpcwp3aGF0IE1hZGR5IHRoZW4gZGlkLgoKPj4gYW5kIGFsc28gdGhp cyB3aWxsIHJlbW92ZSB0aGUKPj4gY29tcGlsZSB0aW1lIG9wdGlvbiB0byBkaXNhYmxlIHRoZSBm ZWF0dXJlPwo+Cj4gQ29tcGlsZSB0aW1lIG9wdGlvbiBtZWFuaW5nIHlvdXIgRkFVTFRfQVJPVU5E X09SREVSIGluIG1tL0tjb25maWcKPiBmb3IgdjMuMTY/Cj4KPiBJJ20gbm90IHN1cmUgd2hldGhl ciBSdXN0eSB3YXMgYXJndWluZyBhZ2FpbnN0IHRoYXQgb3Igbm90LiAgSSB0aGluawo+IHdlIGFy ZSBhbGwgdGhyZWUgY29uY2VybmVkIHRvIGhhdmUgYSBtb3JlIHNlbnNpYmxlIGRlZmF1bHQgdGhh biB3aGF0J3MKPiB0aGVyZSBhdCBwcmVzZW50LiAgSSBkb24ndCBmZWVsIHZlcnkgc3Ryb25nbHkg YWJvdXQgeW91ciBLY29uZmlnCj4gb3B0aW9uOiBJJ3ZlIG5vIG9iamVjdGlvbiwgaWYgaXQgd2Vy ZSB0byBkZWZhdWx0IHRvIGJ5dGUgb3JkZXIgMTYuCgpJIGRvbid0IG1pbmQgZWl0aGVyLgoKQ2hl ZXJzLApSdXN0eS4KX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f X18KTGludXhwcGMtZGV2IG1haWxpbmcgbGlzdApMaW51eHBwYy1kZXZAbGlzdHMub3psYWJzLm9y ZwpodHRwczovL2xpc3RzLm96bGFicy5vcmcvbGlzdGluZm8vbGludXhwcGMtZGV2 From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 20 May 2014 00:32:01 -0700 Message-ID: <20140520003201.a2360d5d.akpm@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <87oaythsvk.fsf@rustcorp.com.au> Sender: owner-linux-mm@kvack.org To: Rusty Russell Cc: "Kirill A. Shutemov" , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org On Tue, 20 May 2014 15:52:07 +0930 Rusty Russell wrote: > "Kirill A. Shutemov" writes: > > Andrew Morton wrote: > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >> > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >> > the order of the fault-around size in bytes, and fault_around_pages() > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > >> > >> Yes. And shame on me for missing it (this time!) at review. > >> > >> There's still time to fix this. Patches, please. > > > > Here it is. Made at 3.30 AM, build tested only. > > Prefer on top of Maddy's patch which makes it always a variable, rather > than CONFIG_DEBUG_FS. It's got enough hair as it is. > We're at 3.15-rc5 and this interface should be finalised for 3.16. So Kirrill's patch is pretty urgent and should come first. Well. It's only a debugfs interface at this stage so we are allowed to change it later, but it's better not to. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 20 May 2014 07:36:42 +0530 Message-ID: <537AB8B2.3040000@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Return-path: In-Reply-To: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: linuxppc-dev-bounces+glppd-linuxppc64-dev=m.gmane.org@lists.ozlabs.org Sender: "Linuxppc-dev" To: Hugh Dickins Cc: linux-arch@vger.kernel.org, riel@redhat.com, x86@kernel.org, dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: linux-arch.vger.kernel.org T24gVHVlc2RheSAyMCBNYXkgMjAxNCAwNDo1MyBBTSwgSHVnaCBEaWNraW5zIHdyb3RlOgo+IE9u IE1vbiwgMTkgTWF5IDIwMTQsIE1hZGhhdmFuIFNyaW5pdmFzYW4gd3JvdGU6Cj4+IE9uIE1vbmRh eSAxOSBNYXkgMjAxNCAwNTo0MiBBTSwgUnVzdHkgUnVzc2VsbCB3cm90ZToKPj4+IEh1Z2ggRGlj a2lucyA8aHVnaGRAZ29vZ2xlLmNvbT4gd3JpdGVzOgo+Pj4+IE9uIFRodSwgMTUgTWF5IDIwMTQs IE1hZGhhdmFuIFNyaW5pdmFzYW4gd3JvdGU6Cj4+Pj4+Cj4+Pj4+IEhpIEluZ28sCj4+Pj4+Cj4+ Pj4+IAlEbyB5b3UgaGF2ZSBhbnkgY29tbWVudHMgZm9yIHRoZSBsYXRlc3QgdmVyc2lvbiBvZiB0 aGUgcGF0Y2hzZXQuIElmCj4+Pj4+IG5vdCwga2luZGx5IGNhbiB5b3UgcGljayBpdCB1cCBhcyBp cy4KPj4+Pj4KPj4+Pj4KPj4+Pj4gV2l0aCByZWdhcmRzCj4+Pj4+IE1hZGR5Cj4+Pj4+Cj4+Pj4+ PiBLaXJpbGwgQS4gU2h1dGVtb3Ygd2l0aCA4YzZlNTBiMDI5IGNvbW1pdCBpbnRyb2R1Y2VkCj4+ Pj4+PiB2bV9vcHMtPm1hcF9wYWdlcygpIGZvciBtYXBwaW5nIGVhc3kgYWNjZXNzaWJsZSBwYWdl cyBhcm91bmQKPj4+Pj4+IGZhdWx0IGFkZHJlc3MgaW4gaG9wZSB0byByZWR1Y2UgbnVtYmVyIG9m IG1pbm9yIHBhZ2UgZmF1bHRzLgo+Pj4+Pj4KPj4+Pj4+IFRoaXMgcGF0Y2ggY3JlYXRlcyBpbmZy YXN0cnVjdHVyZSB0byBtb2RpZnkgdGhlIEZBVUxUX0FST1VORF9PUkRFUgo+Pj4+Pj4gdmFsdWUg dXNpbmcgbW0vS2NvbmZpZy4gVGhpcyB3aWxsIGVuYWJsZSBhcmNoaXRlY3R1cmUgbWFpbnRhaW5l cnMKPj4+Pj4+IHRvIGRlY2lkZSBvbiBzdWl0YWJsZSBGQVVMVF9BUk9VTkRfT1JERVIgdmFsdWUg YmFzZWQgb24KPj4+Pj4+IHBlcmZvcm1hbmNlIGRhdGEgZm9yIHRoYXQgYXJjaGl0ZWN0dXJlLiBG aXJzdCBwYXRjaCBhbHNvIGRlZmF1bHRzCj4+Pj4+PiBGQVVMVF9BUk9VTkRfT1JERVIgS2NvbmZp ZyBlbGVtZW50IHRvIDQuIFNlY29uZCBwYXRjaCBsaXN0Cj4+Pj4+PiBvdXQgdGhlIHBlcmZvcm1h bmNlIG51bWJlcnMgZm9yIHBvd2VycGMgKHBsYXRmb3JtIHBzZXJpZXMpIGFuZAo+Pj4+Pj4gaW5p dGlhbGl6ZSB0aGUgZmF1bHQgYXJvdW5kIG9yZGVyIHZhcmlhYmxlIGZvciBwc2VyaWVzIHBsYXRm b3JtIG9mCj4+Pj4+PiBwb3dlcnBjLgo+Pj4+Cj4+Pj4gU29ycnkgZm9yIG5vdCBjb21tZW50aW5n IGVhcmxpZXIgLSBqdXN0IHJlbWluZGVkIGJ5IHRoaXMgcGluZyB0byBJbmdvLgo+Pj4+Cj4+Pj4g SSBkaWRuJ3Qgc3R1ZHkgeW91ciBudW1iZXJzLCBidXQgbm93aGVyZSBkaWQgSSBzZWUgd2hhdCBQ QUdFX1NJWkUgeW91IHVzZS4KPj4+Pgo+Pj4+IGFyY2gvcG93ZXJwYy9LY29uZmlnIHN1Z2dlc3Rz IHRoYXQgUG93ZXIgc3VwcG9ydHMgYmFzZSBwYWdlIHNpemUgb2YKPj4+PiA0aywgMTZrLCA2NGsg b3IgMjU2ay4KPj4+Pgo+Pj4+IEkgd291bGQgZXhwZWN0IHlvdXIgb3B0aW1hbCBmYXVsdF9hcm91 bmRfb3JkZXIgdG8gZGVwZW5kIHZlcnkgbXVjaCBvbgo+Pj4+IHRoZSBiYXNlIHBhZ2Ugc2l6ZS4K Pj4+Cj4+PiBJdCB3YXMgNjRrLCB3aGljaCBpcyB3aGF0IFBQQzY0IHVzZXMgb24gYWxsIHRoZSBt YWpvciBkaXN0cmlidXRpb25zLgo+Pj4gWW91IHJlYWxseSBvbmx5IGdldCBhIGNob2ljZSBvZiA0 ayBhbmQgNjRrIHdpdGggNjQgYml0IHBvd2VyLgo+Pj4KPj4gVGhpcyBpcyB0cnVlLiBQUEM2NCBz dXBwb3J0IG11bHRpcGxlIHBhZ2VzaXplIGFuZCB5ZXMgdGhlIGRlZmF1bHQgcGFnZQo+PiBzaXpl IG9mIDY0aywgaXMgdGFrZW4gYXMgYmFzZSBwYWdlc2l6ZSBmb3IgdGhlIHRlc3RzLgo+Pgo+Pj4+ IFBlcmhhcHMgZmF1bHRfYXJvdW5kX3NpemUgd291bGQgcHJvdmlkZSBhIG1vcmUgdXNlZnVsIGRl ZmF1bHQ/Cj4+Pgo+Pj4gVGhhdCBzZWVtcyB0byBmaXQuICBXaXRoIDRrIHBhZ2VzIGFuZCBvcmRl ciA0LCB5b3UncmUgYXNraW5nIGZvciA2NGsuCj4+PiBNYWRkeSdzIHJlc3VsdCBzaG93cyA2NGsg aXMgYWxzbyByZWFzb25hYmxlIGZvciA2NGsgcGFnZXMuCj4+Pgo+Pj4gUGVyaGFwcyB3ZSB0cnkg dG8gZ2VuZXJhbGl6ZSBmcm9tIHR3byBkYXRhIHBvaW50cyAoYSBzbGlnaHQgaW1wcm92ZW1lbnQK Pj4+IG92ZXIgZG9pbmcgaXQgZnJvbSAxISksIGVnOgo+Pj4KPj4+IC8qIDQgc2VlbXMgZ29vZCBm b3IgNGstcGFnZSB4ODYsIDAgc2VlbXMgZ29vZCBmb3IgNjRrIHBhZ2UgcHBjNjQsIHNvOiAqLwo+ Pj4gdW5zaWduZWQgaW50IGZhdWx0X2Fyb3VuZF9vcmRlciBfX3JlYWRfbW9zdGx5ID0KPj4+ICAg ICAgICAgKDE2IC0gUEFHRV9TSElGVCA8IDAgPyAwIDogMTYgLSBQQUdFX1NISUZUKTsKPiAKPiBS dXN0eSdzIGJpbW9kYWwgYW5zd2VyIGRvZXNuJ3Qgc2VlbSB0aGUgcmlnaHQgc3RhcnRpbmcgcG9p bnQgdG8gbWUuCj4gCj4gU2hvdWxkbid0IEZBVUxUX0FST1VORF9PUkRFUiBhbmQgZmF1bHRfYXJv dW5kX29yZGVyIGJlIGNoYW5nZWQgdG8gYmUKPiB0aGUgb3JkZXIgb2YgdGhlIGZhdWx0LWFyb3Vu ZCBzaXplIGluIGJ5dGVzLCBhbmQgZmF1bHRfYXJvdW5kX3BhZ2VzKCkKPiB1c2UgMVVMIDw8IChm YXVsdF9hcm91bmRfb3JkZXIgLSBQQUdFX1NISUZUKQo+IC0gd2hlbiB0aGF0IGRvZXNuJ3Qgd3Jh cCwgb2YgY291cnNlIQo+IAo+IFRoYXQgd291bGQgYXQgbGVhc3QgaGF2ZSBhIGJldHRlciBjaGFu Y2Ugb2YgYmVpbmcgYXBwcm9wcmlhdGUgZm9yCj4gYXJjaGl0ZWN0dXJlcyB3aXRoIDhrIGFuZCAx NmsgcGFnZXMgKEl0YW5pdW0gc3ByaW5ncyB0byBtaW5kKS4KPiAKPiBOb3QgbmVjZXNzYXJpbHkg cmlnaHQgZm9yIHRoZW0sIHNpbmNlIGVhY2ggYXJjaGl0ZWN0dXJlIG1heSBoYXZlCj4gZGlmZmVy ZW50IGZhdWx0aW5nIG92ZXJoZWFkczsgYnV0IGEgYmV0dGVyIGNoYW5jZSBvZiBiZWluZyByaWdo dAo+IHRoYW4gYmxpbmRseSBhc3N1bWluZyA0ayBvciA2NGsgcGFnZXMgZm9yIGV2ZXJ5b25lLgo+ IAo+IEknZCBiZSBnbGFkIHRvIHNlZSB0aGF0IGNoYW5nZSBnbyBpbnRvIHYzLjE1OiB3aGF0IGRv IHlvdSB0aGluaywKPiBLaXJpbGwsIGFyZSB3ZSB0b28gbGF0ZSB0byBtYWtlIHN1Y2ggYSBjaGFu Z2Ugbm93Pwo+IE9yIGRvIHlvdSBzZWUgc29tZSBvYmplY3Rpb24gdG8gaXQ/Cj4gCj4+IFRoaXMg bWF5IGJlIHJpZ2h0LiBCdXQgdGhlc2UgYXJlIHRoZSBjb25jZXJucywgd2lsbCBub3QgdGhpcyBt YWtlIG90aGVyCj4+IGFyY2ggdG8gcGljayBkZWZhdWx0IHdpdGhvdXQgYW55IHR1bmluZwo+IAo+ IFdhc24ndCBGQVVMVF9BUk9VTkRfT1JERVIgNCBjaG9zZW4gc29sZWx5IG9uIHRoZSBiYXNpcyBv ZiB4ODYgNGsgcGFnZXM/Cj4gRGlkIG90aGVyIGFyY2hpdGVjdHVyZXMsIHdpdGggb3RoZXIgcGFn ZSBzaXplcywgYmFjayB0aGF0IGRlZmF1bHQ/Cj4gQ2xlYXJseSBub3QgcG93ZXJwYy4KCk9rLgoK PiAKPj4gYW5kIGFsc28gdGhpcyB3aWxsIHJlbW92ZSB0aGUKPj4gY29tcGlsZSB0aW1lIG9wdGlv biB0byBkaXNhYmxlIHRoZSBmZWF0dXJlPwo+IAo+IENvbXBpbGUgdGltZSBvcHRpb24gbWVhbmlu ZyB5b3VyIEZBVUxUX0FST1VORF9PUkRFUiBpbiBtbS9LY29uZmlnCj4gZm9yIHYzLjE2Pwo+IAo+ IEknbSBub3Qgc3VyZSB3aGV0aGVyIFJ1c3R5IHdhcyBhcmd1aW5nIGFnYWluc3QgdGhhdCBvciBu b3QgSSB0aGluawoKPiB3ZSBhcmUgYWxsIHRocmVlIGNvbmNlcm5lZCB0byBoYXZlIGEgbW9yZSBz ZW5zaWJsZSBkZWZhdWx0IHRoYW4gd2hhdCdzCj4gdGhlcmUgYXQgcHJlc2VudC4gIEkgZG9uJ3Qg ZmVlbCB2ZXJ5IHN0cm9uZ2x5IGFib3V0IHlvdXIgS2NvbmZpZwoKQWRkZWQgaXQgYXMgb25lIHdh eSB0byByZXNldCBvciBkaXNhYmxlIHRoZSBkZWZhdWx0IHZhbHVlLiBCdXQgdGhlbiBJCmd1ZXNz IHdlIGRlY2lkZWQgb24gaGF2aW5nIEZBVUxUX0FST1VORF9PUkRFUiBhcyBhIHZhcmlhYmxlIHdo aWNoIGlzCm1vcmUgaW1wb3J0YW50IHRoYW4gS2NvbmZpZyBvcHRpb24uCgo+IG9wdGlvbjogSSd2 ZSBubyBvYmplY3Rpb24sIGlmIGl0IHdlcmUgdG8gZGVmYXVsdCB0byBieXRlIG9yZGVyIDE2Lgo+ IAoKVGhhbmtzIGZvciByZXZpZXcKV2l0aCByZWdhcmRzCk1hZGR5Cgo+IEh1Z2gKPiAKCl9fX19f X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fCkxpbnV4cHBjLWRldiBt YWlsaW5nIGxpc3QKTGludXhwcGMtZGV2QGxpc3RzLm96bGFicy5vcmcKaHR0cHM6Ly9saXN0cy5v emxhYnMub3JnL2xpc3RpbmZvL2xpbnV4cHBjLWRldg== From mboxrd@z Thu Jan 1 00:00:00 1970 From: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Mon, 19 May 2014 19:34:27 -0700 (PDT) Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <87d2f9jlpd.fsf@rustcorp.com.au> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Return-path: In-Reply-To: <87d2f9jlpd.fsf@rustcorp.com.au> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: linuxppc-dev-bounces+glppe-linuxppc-embedded-2=m.gmane.org@lists.ozlabs.org Sender: "Linuxppc-dev" To: Rusty Russell Cc: linux-arch@vger.kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: linux-arch.vger.kernel.org T24gVHVlLCAyMCBNYXkgMjAxNCwgUnVzdHkgUnVzc2VsbCB3cm90ZToKPiBIdWdoIERpY2tpbnMg PGh1Z2hkQGdvb2dsZS5jb20+IHdyaXRlczoKPiA+PiBPbiBNb25kYXkgMTkgTWF5IDIwMTQgMDU6 NDIgQU0sIFJ1c3R5IFJ1c3NlbGwgd3JvdGU6Cj4gPj4gPiAKPiA+PiA+IFBlcmhhcHMgd2UgdHJ5 IHRvIGdlbmVyYWxpemUgZnJvbSB0d28gZGF0YSBwb2ludHMgKGEgc2xpZ2h0IGltcHJvdmVtZW50 Cj4gPj4gPiBvdmVyIGRvaW5nIGl0IGZyb20gMSEpLCBlZzoKPiA+PiA+IAo+ID4+ID4gLyogNCBz ZWVtcyBnb29kIGZvciA0ay1wYWdlIHg4NiwgMCBzZWVtcyBnb29kIGZvciA2NGsgcGFnZSBwcGM2 NCwgc286ICovCj4gPj4gPiB1bnNpZ25lZCBpbnQgZmF1bHRfYXJvdW5kX29yZGVyIF9fcmVhZF9t b3N0bHkgPQo+ID4+ID4gICAgICAgICAoMTYgLSBQQUdFX1NISUZUIDwgMCA/IDAgOiAxNiAtIFBB R0VfU0hJRlQpOwo+ID4KPiA+IFJ1c3R5J3MgYmltb2RhbCBhbnN3ZXIgZG9lc24ndCBzZWVtIHRo ZSByaWdodCBzdGFydGluZyBwb2ludCB0byBtZS4KPiAKPiA/ICBJdCdzIG5vdCBiaW1vZGFsLCBp dCdzIGdyYWRlZC4gIEkgdGhpbmsgeW91IG1pc3JlYWQ/CgpZaWtlcywgd29yc2UgdGhhbiBtaXNy ZWFkLCBtb3JlIGxpa2UgSSB3YXMgdG9vIHJ1ZGUgZXZlbiB0byByZWFkOiBzb3JyeSEKCkh1Z2gK X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KTGludXhwcGMt ZGV2IG1haWxpbmcgbGlzdApMaW51eHBwYy1kZXZAbGlzdHMub3psYWJzLm9yZwpodHRwczovL2xp c3RzLm96bGFicy5vcmcvbGlzdGluZm8vbGludXhwcGMtZGV2 From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Mon, 19 May 2014 16:43:01 -0700 Message-ID: <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Return-path: In-Reply-To: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: linuxppc-dev-bounces+glppe-linuxppc-embedded-2=m.gmane.org@lists.ozlabs.org Sender: "Linuxppc-dev" To: Hugh Dickins Cc: linux-arch@vger.kernel.org, x86@kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: linux-arch.vger.kernel.org T24gTW9uLCAxOSBNYXkgMjAxNCAxNjoyMzowNyAtMDcwMCAoUERUKSBIdWdoIERpY2tpbnMgPGh1 Z2hkQGdvb2dsZS5jb20+IHdyb3RlOgoKPiBTaG91bGRuJ3QgRkFVTFRfQVJPVU5EX09SREVSIGFu ZCBmYXVsdF9hcm91bmRfb3JkZXIgYmUgY2hhbmdlZCB0byBiZQo+IHRoZSBvcmRlciBvZiB0aGUg ZmF1bHQtYXJvdW5kIHNpemUgaW4gYnl0ZXMsIGFuZCBmYXVsdF9hcm91bmRfcGFnZXMoKQo+IHVz ZSAxVUwgPDwgKGZhdWx0X2Fyb3VuZF9vcmRlciAtIFBBR0VfU0hJRlQpCgpZZXMuICBBbmQgc2hh bWUgb24gbWUgZm9yIG1pc3NpbmcgaXQgKHRoaXMgdGltZSEpIGF0IHJldmlldy4KClRoZXJlJ3Mg c3RpbGwgdGltZSB0byBmaXggdGhpcy4gIFBhdGNoZXMsIHBsZWFzZS4KX19fX19fX19fX19fX19f X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KTGludXhwcGMtZGV2IG1haWxpbmcgbGlz dApMaW51eHBwYy1kZXZAbGlzdHMub3psYWJzLm9yZwpodHRwczovL2xpc3RzLm96bGFicy5vcmcv bGlzdGluZm8vbGludXhwcGMtZGV2 From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Kirill A. Shutemov" Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 20 May 2014 03:44:29 +0300 (EEST) Message-ID: <20140520004429.E660AE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> Sender: owner-linux-mm@kvack.org To: Andrew Morton Cc: Hugh Dickins , Madhavan Srinivasan , Rusty Russell , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org Andrew Morton wrote: > On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > the order of the fault-around size in bytes, and fault_around_pages() > > use 1UL << (fault_around_order - PAGE_SHIFT) > > Yes. And shame on me for missing it (this time!) at review. > > There's still time to fix this. Patches, please. Here it is. Made at 3.30 AM, build tested only. I'll sign it off tomorrow after testing. diff --git a/mm/memory.c b/mm/memory.c index 037b812a9531..9d6941c9a9e4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,62 +3402,62 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +#define FAULT_AROUND_BYTES 65536 #ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; +static unsigned int fault_around_bytes = FAULT_AROUND_BYTES; -static int fault_around_order_get(void *data, u64 *val) +static int fault_around_bytes_get(void *data, u64 *val) { - *val = fault_around_order; + *val = fault_around_bytes; return 0; } -static int fault_around_order_set(void *data, u64 val) +static int fault_around_bytes_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); - if (1UL << val > PTRS_PER_PTE) + BUILD_BUG_ON(FAULT_AROUND_BYTES / PAGE_SIZE > PTRS_PER_PTE); + if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; - fault_around_order = val; + fault_around_bytes = val; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, - fault_around_order_get, fault_around_order_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { void *ret; - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, - &fault_around_order_fops); + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, + &fault_around_bytes_fops); if (!ret) - pr_warn("Failed to create fault_around_order in debugfs"); + pr_warn("Failed to create fault_around_bytes in debugfs"); return 0; } late_initcall(fault_around_debugfs); static inline unsigned long fault_around_pages(void) { - return 1UL << fault_around_order; + return fault_around_bytes / PAGE_SIZE; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); + return ~(round_down(fault_around_bytes, PAGE_SIZE) - 1); } #else static inline unsigned long fault_around_pages(void) { unsigned long nr_pages; - nr_pages = 1UL << FAULT_AROUND_ORDER; + nr_pages = FAULT_AROUND_BYTES / PAGE_SIZE; BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); return nr_pages; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); + return ~(round_down(FAULT_AROUND_BYTES, PAGE_SIZE) - 1); } #endif @@ -3515,7 +3515,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- Kirill A. Shutemov -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Morton Subject: Re: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries Date: Tue, 20 May 2014 00:28:34 -0700 Message-ID: <20140520002834.aefb5a90.akpm@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> Sender: owner-linux-mm@kvack.org To: Madhavan Srinivasan Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org On Thu, 8 May 2014 14:58:16 +0530 Madhavan Srinivasan wrote: > --- a/arch/powerpc/platforms/pseries/pseries.h > +++ b/arch/powerpc/platforms/pseries/pseries.h > @@ -17,6 +17,8 @@ struct device_node; > extern void request_event_sources_irqs(struct device_node *np, > irq_handler_t handler, const char *name); > > +extern unsigned int fault_around_order; This isn't an appropriate header file for exporting something from core mm - what happens if arch/mn10300 wants it?. I guess include/linux/mm.h is the place. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 20 May 2014 13:23:03 +0530 Message-ID: <537B09DF.1090906@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520003201.a2360d5d.akpm@linux-foundation.org> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Return-path: Received: from e28smtp03.in.ibm.com ([122.248.162.3]:48600 "EHLO e28smtp03.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750850AbaETHxP (ORCPT ); Tue, 20 May 2014 03:53:15 -0400 Received: from /spool/local by e28smtp03.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 13:23:12 +0530 In-Reply-To: <20140520003201.a2360d5d.akpm@linux-foundation.org> Sender: linux-arch-owner@vger.kernel.org List-ID: To: Andrew Morton , Rusty Russell Cc: "Kirill A. Shutemov" , Hugh Dickins , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tuesday 20 May 2014 01:02 PM, Andrew Morton wrote: > On Tue, 20 May 2014 15:52:07 +0930 Rusty Russell wrote: > >> "Kirill A. Shutemov" writes: >>> Andrew Morton wrote: >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>> >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>> >>>> Yes. And shame on me for missing it (this time!) at review. >>>> >>>> There's still time to fix this. Patches, please. >>> >>> Here it is. Made at 3.30 AM, build tested only. >> >> Prefer on top of Maddy's patch which makes it always a variable, rather >> than CONFIG_DEBUG_FS. It's got enough hair as it is. >> > > We're at 3.15-rc5 and this interface should be finalised for 3.16. So > Kirrill's patch is pretty urgent and should come first. > > Well. It's only a debugfs interface at this stage so we are allowed to > change it later, but it's better not to. > My patchset does not change the interface, but uses the current fault around order variable from CONFIG_DEBUG_FS block to allow changes at runtime, instead of having a constant and some cleanup. Thanks for review Regards --Maddy From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: Re: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries Date: Tue, 20 May 2014 13:33:58 +0530 Message-ID: <537B0C6E.4030501@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> <20140520002834.aefb5a90.akpm@linux-foundation.org> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Return-path: In-Reply-To: <20140520002834.aefb5a90.akpm@linux-foundation.org> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: linuxppc-dev-bounces+glppd-linuxppc64-dev=m.gmane.org@lists.ozlabs.org Sender: "Linuxppc-dev" To: Andrew Morton Cc: linux-arch@vger.kernel.org, riel@redhat.com, rusty@rustcorp.com.au, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: linux-arch.vger.kernel.org T24gVHVlc2RheSAyMCBNYXkgMjAxNCAxMjo1OCBQTSwgQW5kcmV3IE1vcnRvbiB3cm90ZToKPiBP biBUaHUsICA4IE1heSAyMDE0IDE0OjU4OjE2ICswNTMwIE1hZGhhdmFuIFNyaW5pdmFzYW4gPG1h ZGR5QGxpbnV4LnZuZXQuaWJtLmNvbT4gd3JvdGU6Cj4gCj4+IC0tLSBhL2FyY2gvcG93ZXJwYy9w bGF0Zm9ybXMvcHNlcmllcy9wc2VyaWVzLmgKPj4gKysrIGIvYXJjaC9wb3dlcnBjL3BsYXRmb3Jt cy9wc2VyaWVzL3BzZXJpZXMuaAo+PiBAQCAtMTcsNiArMTcsOCBAQCBzdHJ1Y3QgZGV2aWNlX25v ZGU7Cj4+ICBleHRlcm4gdm9pZCByZXF1ZXN0X2V2ZW50X3NvdXJjZXNfaXJxcyhzdHJ1Y3QgZGV2 aWNlX25vZGUgKm5wLAo+PiAgCQkJCSAgICAgICBpcnFfaGFuZGxlcl90IGhhbmRsZXIsIGNvbnN0 IGNoYXIgKm5hbWUpOwo+PiAgCj4+ICtleHRlcm4gdW5zaWduZWQgaW50IGZhdWx0X2Fyb3VuZF9v cmRlcjsKPiAKPiBUaGlzIGlzbid0IGFuIGFwcHJvcHJpYXRlIGhlYWRlciBmaWxlIGZvciBleHBv cnRpbmcgc29tZXRoaW5nIGZyb20gY29yZQo+IG1tIC0gd2hhdCBoYXBwZW5zIGlmIGFyY2gvbW4x MDMwMCB3YW50cyBpdD8uCj4KPiBJIGd1ZXNzIGluY2x1ZGUvbGludXgvbW0uaCBpcyB0aGUgcGxh Y2UuCj4gCgpSdXN0eSBhbHJlYWR5IHN1Z2dlc3RlZCB0aGlzLiBNeSBiYWQuICBSZWFzb24gZm9y IGFkZGluZyBpdCBoZXJlIHdhcwp0aGF0LCBJIGRpZCB0aGUgcGVyZm9ybWFuY2UgdGVzdCBmb3Ig dGhpcyBwbGF0Zm9ybS4gV2lsbCBjaGFuZ2UgYW5kIHNlbmQKaXQgb3V0LgoKVGhhbmtzIGZvciBy ZXZpZXcKUmVnYXJkcwpNYWRkeQoKX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f X19fX19fX19fX18KTGludXhwcGMtZGV2IG1haWxpbmcgbGlzdApMaW51eHBwYy1kZXZAbGlzdHMu b3psYWJzLm9yZwpodHRwczovL2xpc3RzLm96bGFicy5vcmcvbGlzdGluZm8vbGludXhwcGMtZGV2 From mboxrd@z Thu Jan 1 00:00:00 1970 From: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 20 May 2014 15:52:07 +0930 Message-ID: <87oaythsvk.fsf@rustcorp.com.au> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain Return-path: In-Reply-To: <20140520004429.E660AE009B@blue.fi.intel.com> Sender: linux-kernel-owner@vger.kernel.org To: Andrew Morton Cc: Hugh Dickins , Madhavan Srinivasan , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org "Kirill A. Shutemov" writes: > Andrew Morton wrote: >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >> >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >> > the order of the fault-around size in bytes, and fault_around_pages() >> > use 1UL << (fault_around_order - PAGE_SHIFT) >> >> Yes. And shame on me for missing it (this time!) at review. >> >> There's still time to fix this. Patches, please. > > Here it is. Made at 3.30 AM, build tested only. Prefer on top of Maddy's patch which makes it always a variable, rather than CONFIG_DEBUG_FS. It's got enough hair as it is. Cheers, Rusty. From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Kirill A. Shutemov" Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 20 May 2014 13:27:38 +0300 (EEST) Message-ID: <20140520102738.7F096E009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> Content-Transfer-Encoding: 7bit Return-path: Received: from mga11.intel.com ([192.55.52.93]:2636 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751678AbaETK1p (ORCPT ); Tue, 20 May 2014 06:27:45 -0400 In-Reply-To: <87oaythsvk.fsf@rustcorp.com.au> Sender: linux-arch-owner@vger.kernel.org List-ID: To: Rusty Russell Cc: "Kirill A. Shutemov" , Andrew Morton , Hugh Dickins , Madhavan Srinivasan "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Rusty Russell wrote: > "Kirill A. Shutemov" writes: > > Andrew Morton wrote: > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >> > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >> > the order of the fault-around size in bytes, and fault_around_pages() > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > >> > >> Yes. And shame on me for missing it (this time!) at review. > >> > >> There's still time to fix this. Patches, please. > > > > Here it is. Made at 3.30 AM, build tested only. > > Prefer on top of Maddy's patch which makes it always a variable, rather > than CONFIG_DEBUG_FS. It's got enough hair as it is. Something like this? From: "Kirill A. Shutemov" Date: Tue, 20 May 2014 13:02:03 +0300 Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order There are evidences that faultaround feature is less relevant on architectures with page size bigger then 4k. Which makes sense since page fault overhead per byte of mapped area should be less there. Let's rework the feature to specify faultaround area in bytes instead of page order. It's 64 kilobytes for now. The patch effectively disables faultaround on architectures with page size >= 64k (like ppc64). It's possible that some other size of faultaround area is relevant for a platform. We can expose `fault_around_bytes' variable to arch-specific code once such platforms will be found. Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 037b812a9531..252b319e8cdf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +static unsigned long fault_around_bytes = 65536; + +static inline unsigned long fault_around_pages(void) +{ + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; +} + +static inline unsigned long fault_around_mask(void) +{ + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; +} -#ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; -static int fault_around_order_get(void *data, u64 *val) +#ifdef CONFIG_DEBUG_FS +static int fault_around_bytes_get(void *data, u64 *val) { - *val = fault_around_order; + *val = fault_around_bytes; return 0; } -static int fault_around_order_set(void *data, u64 val) +static int fault_around_bytes_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); - if (1UL << val > PTRS_PER_PTE) + if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; - fault_around_order = val; + fault_around_bytes = val; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, - fault_around_order_get, fault_around_order_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { void *ret; - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, - &fault_around_order_fops); + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, + &fault_around_bytes_fops); if (!ret) - pr_warn("Failed to create fault_around_order in debugfs"); + pr_warn("Failed to create fault_around_bytes in debugfs"); return 0; } late_initcall(fault_around_debugfs); - -static inline unsigned long fault_around_pages(void) -{ - return 1UL << fault_around_order; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); -} -#else -static inline unsigned long fault_around_pages(void) -{ - unsigned long nr_pages; - - nr_pages = 1UL << FAULT_AROUND_ORDER; - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); - return nr_pages; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); -} #endif static void do_fault_around(struct vm_area_struct *vma, unsigned long address, @@ -3515,7 +3499,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 20 May 2014 12:59:56 -0700 Message-ID: <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Return-path: Received: from mail.linuxfoundation.org ([140.211.169.12]:34266 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752647AbaETT75 (ORCPT ); Tue, 20 May 2014 15:59:57 -0400 In-Reply-To: <20140520102738.7F096E009B@blue.fi.intel.com> Sender: linux-arch-owner@vger.kernel.org List-ID: To: "Kirill A. Shutemov" Cc: Rusty Russell , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tue, 20 May 2014 13:27:38 +0300 (EEST) "Kirill A. Shutemov" wrote: > Rusty Russell wrote: > > "Kirill A. Shutemov" writes: > > > Andrew Morton wrote: > > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > >> > > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > >> > the order of the fault-around size in bytes, and fault_around_pages() > > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > > >> > > >> Yes. And shame on me for missing it (this time!) at review. > > >> > > >> There's still time to fix this. Patches, please. > > > > > > Here it is. Made at 3.30 AM, build tested only. > > > > Prefer on top of Maddy's patch which makes it always a variable, rather > > than CONFIG_DEBUG_FS. It's got enough hair as it is. > > Something like this? This appears to be against mainline, not against Madhavan's patch. As mentioned previously, I'd prefer it that way but confused. > From: "Kirill A. Shutemov" > Date: Tue, 20 May 2014 13:02:03 +0300 > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > There are evidences that faultaround feature is less relevant on > architectures with page size bigger then 4k. Which makes sense since > page fault overhead per byte of mapped area should be less there. > > Let's rework the feature to specify faultaround area in bytes instead of > page order. It's 64 kilobytes for now. > > The patch effectively disables faultaround on architectures with > page size >= 64k (like ppc64). > > It's possible that some other size of faultaround area is relevant for a > platform. We can expose `fault_around_bytes' variable to arch-specific > code once such platforms will be found. > > Signed-off-by: Kirill A. Shutemov > --- > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > 1 file changed, 23 insertions(+), 39 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 037b812a9531..252b319e8cdf 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > update_mmu_cache(vma, address, pte); > } > > -#define FAULT_AROUND_ORDER 4 > +static unsigned long fault_around_bytes = 65536; > + > +static inline unsigned long fault_around_pages(void) > +{ > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > +} I think we should round up, not down. So if the user asks for 1kb, they get one page. So this becomes return PAGE_ALIGN(fault_around_bytes) / PAGE_SIZE; > +static inline unsigned long fault_around_mask(void) > +{ > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > +} And this has me a bit stumped. It's not helpful that do_fault_around() is undocumented. Does it fault in N/2 pages ahead and N/2 pages behind? Or does it align the address down to the highest multiple of fault_around_bytes? It appears to be the latter, so the location of the faultaround window around the fault address is basically random, depending on what address userspace happened to pick. I don't know why we did this :( Or something. Can we please get some code commentary over do_fault_around() describing this design decision and explaining the reasoning behind it? Also, "neast" is not a word. From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Kirill A. Shutemov" Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Wed, 21 May 2014 16:40:27 +0300 (EEST) Message-ID: <20140521134027.263DDE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> Sender: owner-linux-mm@kvack.org To: Andrew Morton Cc: "Kirill A. Shutemov" , Rusty Russell , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org Andrew Morton wrote: > On Tue, 20 May 2014 13:27:38 +0300 (EEST) "Kirill A. Shutemov" wrote: > > > Rusty Russell wrote: > > > "Kirill A. Shutemov" writes: > > > > Andrew Morton wrote: > > > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > > >> > > > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > > >> > the order of the fault-around size in bytes, and fault_around_pages() > > > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > > > >> > > > >> Yes. And shame on me for missing it (this time!) at review. > > > >> > > > >> There's still time to fix this. Patches, please. > > > > > > > > Here it is. Made at 3.30 AM, build tested only. > > > > > > Prefer on top of Maddy's patch which makes it always a variable, rather > > > than CONFIG_DEBUG_FS. It's got enough hair as it is. > > > > Something like this? > > This appears to be against mainline, not against Madhavan's patch. As > mentioned previously, I'd prefer it that way but confused. > > > > From: "Kirill A. Shutemov" > > Date: Tue, 20 May 2014 13:02:03 +0300 > > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > > > There are evidences that faultaround feature is less relevant on > > architectures with page size bigger then 4k. Which makes sense since > > page fault overhead per byte of mapped area should be less there. > > > > Let's rework the feature to specify faultaround area in bytes instead of > > page order. It's 64 kilobytes for now. > > > > The patch effectively disables faultaround on architectures with > > page size >= 64k (like ppc64). > > > > It's possible that some other size of faultaround area is relevant for a > > platform. We can expose `fault_around_bytes' variable to arch-specific > > code once such platforms will be found. > > > > Signed-off-by: Kirill A. Shutemov > > --- > > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > > 1 file changed, 23 insertions(+), 39 deletions(-) > > > > diff --git a/mm/memory.c b/mm/memory.c > > index 037b812a9531..252b319e8cdf 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > > update_mmu_cache(vma, address, pte); > > } > > > > -#define FAULT_AROUND_ORDER 4 > > +static unsigned long fault_around_bytes = 65536; > > + > > +static inline unsigned long fault_around_pages(void) > > +{ > > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > > +} > > I think we should round up, not down. So if the user asks for 1kb, > they get one page. > > So this becomes > > return PAGE_ALIGN(fault_around_bytes) / PAGE_SIZE; See below. > > +static inline unsigned long fault_around_mask(void) > > +{ > > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > > +} > > And this has me a bit stumped. It's not helpful that do_fault_around() > is undocumented. Does it fault in N/2 pages ahead and N/2 pages > behind? Or does it align the address down to the highest multiple of > fault_around_bytes? It appears to be the latter, so the location of > the faultaround window around the fault address is basically random, > depending on what address userspace happened to pick. I don't know why > we did this :( When we call ->map_pages() we need to make sure that we stay within VMA and the page table. We don't want to cross page table boundary, because page table is what ptlock covers in split ptlock case. I've designed the feature with fault area nominated in page order in mind and I found it's easier to make sure we don't cross boundaries, if we would align virtual address of fault around area to PAGE_SIZE << FAULT_AROUND_ORDER. And yes fault address may be anywhere within the area. You can think about this as a virtual page with size PAGE_SIZE << FAULT_AROUND_ORDER: no matter what is fault address, we handle area naturally aligned to page size which fault address belong to. I've used rounddown_pow_of_two() in the patch to align to nearest page order, not to page size, because that's what current do_fault_around() expect to see. And roundup is not an option: nobody expects fault around area to be 128k if fault_around_bytes set to 64k + 1 bytes. If you think we need this I can rework do_fault_around() to handle non-pow-of-two fault_around_pages(), but I don't think it's good idea to do this for v3.15. Anyway, patch I've proposed allows change fault_around_bytes only from DEBUG_FS and roundown should be good enough there. > Or something. Can we please get some code commentary over > do_fault_around() describing this design decision and explaining the > reasoning behind it? I'll do this. But if do_fault_around() rework is needed, I want to do that first. > Also, "neast" is not a word. :facepalm: From: "Kirill A. Shutemov" Date: Wed, 21 May 2014 16:36:42 +0300 Subject: [PATCH] mm: fix typo in comment in do_fault_around() Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 252b319e8cdf..f76663c31da6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3460,7 +3460,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, /* * max_pgoff is either end of page table or end of vma - * or fault_around_pages() from pgoff, depending what is neast. + * or fault_around_pages() from pgoff, depending what is nearest. */ max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; -- Kirill A. Shutemov -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Wed, 21 May 2014 13:34:08 -0700 Message-ID: <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> <20140521134027.263DDE009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Return-path: Received: from mail.linuxfoundation.org ([140.211.169.12]:49123 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751786AbaEUUeK (ORCPT ); Wed, 21 May 2014 16:34:10 -0400 In-Reply-To: <20140521134027.263DDE009B@blue.fi.intel.com> Sender: linux-arch-owner@vger.kernel.org List-ID: To: "Kirill A. Shutemov" Cc: Rusty Russell , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Wed, 21 May 2014 16:40:27 +0300 (EEST) "Kirill A. Shutemov" wrote: > > Or something. Can we please get some code commentary over > > do_fault_around() describing this design decision and explaining the > > reasoning behind it? > > I'll do this. But if do_fault_around() rework is needed, I want to do that > first. This sort of thing should be at least partially driven by observation and I don't have the data for that. My seat of the pants feel is that after the first fault, accesses at higher addresses are more common/probable than accesses at lower addresses. In which case we should see improvements by centering the window at some higher address than the fault. Much instrumentation and downstream analysis is needed and the returns will be pretty small! But we don't need to do all that right now. Let's get the current implementation wrapped up for 3.15: get the interface finalized (bytes, not pages!) and get the current design decisions appropriately documented. From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Kirill A. Shutemov" Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Fri, 23 May 2014 15:28:54 +0300 (EEST) Message-ID: <20140523122854.BDB36E009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> <20140521134027.263DDE009B@blue.fi.intel.com> <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> Sender: owner-linux-mm@kvack.org To: Andrew Morton Cc: "Kirill A. Shutemov" , Rusty Russell , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org Andrew Morton wrote: > On Wed, 21 May 2014 16:40:27 +0300 (EEST) "Kirill A. Shutemov" wrote: > > > > Or something. Can we please get some code commentary over > > > do_fault_around() describing this design decision and explaining the > > > reasoning behind it? > > > > I'll do this. But if do_fault_around() rework is needed, I want to do that > > first. > > This sort of thing should be at least partially driven by observation > and I don't have the data for that. My seat of the pants feel is that > after the first fault, accesses at higher addresses are more > common/probable than accesses at lower addresses. It's probably true for data, but the feature is mostly targeted to code pages and situation is not that obvious to me with all jumps. > But we don't need to do all that right now. Let's get the current > implementation wrapped up for 3.15: get the interface finalized (bytes, > not pages!) The patch above by thread is okay for that, right? > and get the current design decisions appropriately documented. Here it is. Based on patch to convert order->bytes. From: "Kirill A. Shutemov" Date: Fri, 23 May 2014 15:16:47 +0300 Subject: [PATCH] mm: document do_fault_around() feature Some clarification on how faultaround works. Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 252b319e8cdf..8d723b8d3c86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3404,6 +3404,10 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, static unsigned long fault_around_bytes = 65536; +/* + * fault_around_pages() and fault_around_mask() round down fault_around_bytes + * to nearest page order. It's what do_fault_around() expects to see. + */ static inline unsigned long fault_around_pages(void) { return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; @@ -3445,6 +3449,29 @@ static int __init fault_around_debugfs(void) late_initcall(fault_around_debugfs); #endif +/* + * do_fault_around() tries to map few pages around the fault address. The hope + * is that the pages will be needed soon and this would lower the number of + * faults to handle. + * + * It uses vm_ops->map_pages() to map the pages, which skips the page if it's + * not ready to be mapped: not up-to-date, locked, etc. + * + * This function is called with the page table lock taken. In the split ptlock + * case the page table lock only protects only those entries which belong to + * page table corresponding to the fault address. + * + * This function don't cross the VMA boundaries in order to call map_pages() + * only once. + * + * fault_around_pages() defines how many pages we'll try to map. + * do_fault_around() expects it to be power of two and less or equal to + * PTRS_PER_PTE. + * + * The virtual address of the area that we map is naturally aligned to the + * fault_around_pages() (and therefore to page order). This way it's easier to + * guarantee that we don't cross the page table boundaries. + */ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, pte_t *pte, pgoff_t pgoff, unsigned int flags) { -- Kirill A. Shutemov -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 27 May 2014 11:54:49 +0530 Message-ID: <53842FB1.7090909@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <20140520102738.7F096E009B@blue.fi.intel.com> Sender: owner-linux-mm@kvack.org To: "Kirill A. Shutemov" , Rusty Russell Cc: Andrew Morton , Hugh Dickins , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: > Rusty Russell wrote: >> "Kirill A. Shutemov" writes: >>> Andrew Morton wrote: >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>> >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>> >>>> Yes. And shame on me for missing it (this time!) at review. >>>> >>>> There's still time to fix this. Patches, please. >>> >>> Here it is. Made at 3.30 AM, build tested only. >> >> Prefer on top of Maddy's patch which makes it always a variable, rather >> than CONFIG_DEBUG_FS. It's got enough hair as it is. > > Something like this? > > From: "Kirill A. Shutemov" > Date: Tue, 20 May 2014 13:02:03 +0300 > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > There are evidences that faultaround feature is less relevant on > architectures with page size bigger then 4k. Which makes sense since > page fault overhead per byte of mapped area should be less there. > > Let's rework the feature to specify faultaround area in bytes instead of > page order. It's 64 kilobytes for now. > > The patch effectively disables faultaround on architectures with > page size >= 64k (like ppc64). > > It's possible that some other size of faultaround area is relevant for a > platform. We can expose `fault_around_bytes' variable to arch-specific > code once such platforms will be found. > > Signed-off-by: Kirill A. Shutemov > --- > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > 1 file changed, 23 insertions(+), 39 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 037b812a9531..252b319e8cdf 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > update_mmu_cache(vma, address, pte); > } > > -#define FAULT_AROUND_ORDER 4 > +static unsigned long fault_around_bytes = 65536; > + > +static inline unsigned long fault_around_pages(void) > +{ > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > +} > + > +static inline unsigned long fault_around_mask(void) > +{ > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > +} > > -#ifdef CONFIG_DEBUG_FS > -static unsigned int fault_around_order = FAULT_AROUND_ORDER; > > -static int fault_around_order_get(void *data, u64 *val) > +#ifdef CONFIG_DEBUG_FS > +static int fault_around_bytes_get(void *data, u64 *val) > { > - *val = fault_around_order; > + *val = fault_around_bytes; > return 0; > } > > -static int fault_around_order_set(void *data, u64 val) > +static int fault_around_bytes_set(void *data, u64 val) > { Kindly ignore the question if not relevant. Even though we need root access to alter the value, will we be fine with negative value?. Regards Maddy > - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); > - if (1UL << val > PTRS_PER_PTE) > + if (val / PAGE_SIZE > PTRS_PER_PTE) > return -EINVAL; > - fault_around_order = val; > + fault_around_bytes = val; > return 0; > } > -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, > - fault_around_order_get, fault_around_order_set, "%llu\n"); > +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, > + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); > > static int __init fault_around_debugfs(void) > { > void *ret; > > - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, > - &fault_around_order_fops); > + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, > + &fault_around_bytes_fops); > if (!ret) > - pr_warn("Failed to create fault_around_order in debugfs"); > + pr_warn("Failed to create fault_around_bytes in debugfs"); > return 0; > } > late_initcall(fault_around_debugfs); > - > -static inline unsigned long fault_around_pages(void) > -{ > - return 1UL << fault_around_order; > -} > - > -static inline unsigned long fault_around_mask(void) > -{ > - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); > -} > -#else > -static inline unsigned long fault_around_pages(void) > -{ > - unsigned long nr_pages; > - > - nr_pages = 1UL << FAULT_AROUND_ORDER; > - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); > - return nr_pages; > -} > - > -static inline unsigned long fault_around_mask(void) > -{ > - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); > -} > #endif > > static void do_fault_around(struct vm_area_struct *vma, unsigned long address, > @@ -3515,7 +3499,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, > * if page by the offset is not ready to be mapped (cold cache or > * something). > */ > - if (vma->vm_ops->map_pages) { > + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { > pte = pte_offset_map_lock(mm, pmd, address, &ptl); > do_fault_around(vma, address, pte, pgoff, flags); > if (!pte_same(*pte, orig_pte)) > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Kirill A. Shutemov" Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 27 May 2014 13:21:59 +0300 (EEST) Message-ID: <20140527102200.012BBE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <53842FB1.7090909@linux.vnet.ibm.com> Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <53842FB1.7090909@linux.vnet.ibm.com> Sender: owner-linux-mm@kvack.org To: Madhavan Srinivasan Cc: "Kirill A. Shutemov" , Rusty Russell , Andrew Morton , Hugh Dickins , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com List-Id: linux-arch.vger.kernel.org Madhavan Srinivasan wrote: > On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: > > Rusty Russell wrote: > >> "Kirill A. Shutemov" writes: > >>> Andrew Morton wrote: > >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >>>> > >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >>>>> the order of the fault-around size in bytes, and fault_around_pages() > >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) > >>>> > >>>> Yes. And shame on me for missing it (this time!) at review. > >>>> > >>>> There's still time to fix this. Patches, please. > >>> > >>> Here it is. Made at 3.30 AM, build tested only. > >> > >> Prefer on top of Maddy's patch which makes it always a variable, rather > >> than CONFIG_DEBUG_FS. It's got enough hair as it is. > > > > Something like this? > > > > From: "Kirill A. Shutemov" > > Date: Tue, 20 May 2014 13:02:03 +0300 > > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > > > There are evidences that faultaround feature is less relevant on > > architectures with page size bigger then 4k. Which makes sense since > > page fault overhead per byte of mapped area should be less there. > > > > Let's rework the feature to specify faultaround area in bytes instead of > > page order. It's 64 kilobytes for now. > > > > The patch effectively disables faultaround on architectures with > > page size >= 64k (like ppc64). > > > > It's possible that some other size of faultaround area is relevant for a > > platform. We can expose `fault_around_bytes' variable to arch-specific > > code once such platforms will be found. > > > > Signed-off-by: Kirill A. Shutemov > > --- > > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > > 1 file changed, 23 insertions(+), 39 deletions(-) > > > > diff --git a/mm/memory.c b/mm/memory.c > > index 037b812a9531..252b319e8cdf 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > > update_mmu_cache(vma, address, pte); > > } > > > > -#define FAULT_AROUND_ORDER 4 > > +static unsigned long fault_around_bytes = 65536; > > + > > +static inline unsigned long fault_around_pages(void) > > +{ > > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > > +} > > + > > +static inline unsigned long fault_around_mask(void) > > +{ > > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > > +} > > > > -#ifdef CONFIG_DEBUG_FS > > -static unsigned int fault_around_order = FAULT_AROUND_ORDER; > > > > -static int fault_around_order_get(void *data, u64 *val) > > +#ifdef CONFIG_DEBUG_FS > > +static int fault_around_bytes_get(void *data, u64 *val) > > { > > - *val = fault_around_order; > > + *val = fault_around_bytes; > > return 0; > > } > > > > -static int fault_around_order_set(void *data, u64 val) > > +static int fault_around_bytes_set(void *data, u64 val) > > { > > Kindly ignore the question if not relevant. Even though we need root > access to alter the value, will we be fine with > negative value?. val is u64. or I miss something? -- Kirill A. Shutemov -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Tue, 27 May 2014 16:14:05 +0530 Message-ID: <53846C75.10507@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <53842FB1.7090909@linux.vnet.ibm.com> <20140527102200.012BBE009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Return-path: Received: from e28smtp01.in.ibm.com ([122.248.162.1]:49381 "EHLO e28smtp01.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752193AbaE0KoV (ORCPT ); Tue, 27 May 2014 06:44:21 -0400 Received: from /spool/local by e28smtp01.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 27 May 2014 16:14:17 +0530 In-Reply-To: <20140527102200.012BBE009B@blue.fi.intel.com> Sender: linux-arch-owner@vger.kernel.org List-ID: To: "Kirill A. Shutemov" Cc: Rusty Russell , Andrew Morton , Hugh Dickins , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tuesday 27 May 2014 03:51 PM, Kirill A. Shutemov wrote: > Madhavan Srinivasan wrote: >> On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: >>> Rusty Russell wrote: >>>> "Kirill A. Shutemov" writes: >>>>> Andrew Morton wrote: >>>>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>>>> >>>>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>>>> >>>>>> Yes. And shame on me for missing it (this time!) at review. >>>>>> >>>>>> There's still time to fix this. Patches, please. >>>>> >>>>> Here it is. Made at 3.30 AM, build tested only. >>>> >>>> Prefer on top of Maddy's patch which makes it always a variable, rather >>>> than CONFIG_DEBUG_FS. It's got enough hair as it is. >>> >>> Something like this? >>> >>> From: "Kirill A. Shutemov" >>> Date: Tue, 20 May 2014 13:02:03 +0300 >>> Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order >>> >>> There are evidences that faultaround feature is less relevant on >>> architectures with page size bigger then 4k. Which makes sense since >>> page fault overhead per byte of mapped area should be less there. >>> >>> Let's rework the feature to specify faultaround area in bytes instead of >>> page order. It's 64 kilobytes for now. >>> >>> The patch effectively disables faultaround on architectures with >>> page size >= 64k (like ppc64). >>> >>> It's possible that some other size of faultaround area is relevant for a >>> platform. We can expose `fault_around_bytes' variable to arch-specific >>> code once such platforms will be found. >>> >>> Signed-off-by: Kirill A. Shutemov >>> --- >>> mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- >>> 1 file changed, 23 insertions(+), 39 deletions(-) >>> >>> diff --git a/mm/memory.c b/mm/memory.c >>> index 037b812a9531..252b319e8cdf 100644 >>> --- a/mm/memory.c >>> +++ b/mm/memory.c >>> @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, >>> update_mmu_cache(vma, address, pte); >>> } >>> >>> -#define FAULT_AROUND_ORDER 4 >>> +static unsigned long fault_around_bytes = 65536; >>> + >>> +static inline unsigned long fault_around_pages(void) >>> +{ >>> + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; >>> +} >>> + >>> +static inline unsigned long fault_around_mask(void) >>> +{ >>> + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; >>> +} >>> >>> -#ifdef CONFIG_DEBUG_FS >>> -static unsigned int fault_around_order = FAULT_AROUND_ORDER; >>> >>> -static int fault_around_order_get(void *data, u64 *val) >>> +#ifdef CONFIG_DEBUG_FS >>> +static int fault_around_bytes_get(void *data, u64 *val) >>> { >>> - *val = fault_around_order; >>> + *val = fault_around_bytes; >>> return 0; >>> } >>> >>> -static int fault_around_order_set(void *data, u64 val) >>> +static int fault_around_bytes_set(void *data, u64 val) >>> { >> >> Kindly ignore the question if not relevant. Even though we need root >> access to alter the value, will we be fine with >> negative value?. > ppc > val is u64. or I miss something? > My Bad. What I wanted to check was for all 0xf input and guess we are fine. Sorry about that. Regards Maddy From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp06.in.ibm.com ([122.248.162.6]:43298 "EHLO e28smtp06.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753088AbaEHJ2b (ORCPT ); Thu, 8 May 2014 05:28:31 -0400 Received: from /spool/local by e28smtp06.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 8 May 2014 14:58:27 +0530 From: Madhavan Srinivasan Subject: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Thu, 8 May 2014 14:58:14 +0530 Message-ID: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Sender: linux-arch-owner@vger.kernel.org List-ID: To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Cc: benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com, Madhavan Srinivasan Message-ID: <20140508092814.EXMv7dz3KcoD_cHOERjTT3R6Mx4DHLpLeC7CJb6MV6A@z> Kirill A. Shutemov with 8c6e50b029 commit introduced vm_ops->map_pages() for mapping easy accessible pages around fault address in hope to reduce number of minor page faults. This patch creates infrastructure to modify the FAULT_AROUND_ORDER value using mm/Kconfig. This will enable architecture maintainers to decide on suitable FAULT_AROUND_ORDER value based on performance data for that architecture. First patch also defaults FAULT_AROUND_ORDER Kconfig element to 4. Second patch list out the performance numbers for powerpc (platform pseries) and initialize the fault around order variable for pseries platform of powerpc. V4 Changes: Replaced the BUILD_BUG_ON with VM_BUG_ON. Moved fault_around_pages() and fault_around_mask() functions outside of #ifdef CONFIG_DEBUG_FS. V3 Changes: Replaced FAULT_AROUND_ORDER macro to a variable to support arch's that supports sub platforms. Made changes in commit messages. V2 Changes: Created Kconfig parameter for FAULT_AROUND_ORDER Added check in do_read_fault to handle FAULT_AROUND_ORDER value of 0 Made changes in commit messages. Madhavan Srinivasan (2): mm: move FAULT_AROUND_ORDER to arch/ powerpc/pseries: init fault_around_order for pseries arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 5 +++++ mm/Kconfig | 8 ++++++++ mm/memory.c | 25 ++++++------------------- 4 files changed, 21 insertions(+), 19 deletions(-) -- 1.7.10.4 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp09.in.ibm.com ([122.248.162.9]:49349 "EHLO e28smtp09.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753596AbaEHJ2c (ORCPT ); Thu, 8 May 2014 05:28:32 -0400 Received: from /spool/local by e28smtp09.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 8 May 2014 14:58:28 +0530 From: Madhavan Srinivasan Subject: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries Date: Thu, 8 May 2014 14:58:16 +0530 Message-ID: <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Sender: linux-arch-owner@vger.kernel.org List-ID: To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Cc: benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com, Madhavan Srinivasan Message-ID: <20140508092816.jHdJW1ePF0vA0-enuPZYuWuRERXhqA5G2po8cU0Uclc@z> Performance data for different FAULT_AROUND_ORDER values from 4 socket Power7 system (128 Threads and 128GB memory). perf stat with repeat of 5 is used to get the stddev values. Test ran in v3.14 kernel (Baseline) and v3.15-rc1 for different fault around order values. %change here is calculated in this method ((new value - baseline)/baseline). And negative %change says its a drop in time. FAULT_AROUND_ORDER Baseline 1 3 4 5 8 Linux build (make -j64) minor-faults 47,437,359 35,279,286 25,425,347 23,461,275 22,002,189 21,435,836 times in seconds 347.302528420 344.061588460 340.974022391 348.193508116 348.673900158 350.986543618 stddev for time ( +- 1.50% ) ( +- 0.73% ) ( +- 1.13% ) ( +- 1.01% ) ( +- 1.89% ) ( +- 1.55% ) %chg time to baseline -0.9% -1.8% 0.2% 0.39% 1.06% Linux rebuild (make -j64) minor-faults 941,552 718,319 486,625 440,124 410,510 397,416 times in seconds 30.569834718 31.219637539 31.319370649 31.434285472 31.972367174 31.443043580 stddev for time ( +- 1.07% ) ( +- 0.13% ) ( +- 0.43% ) ( +- 0.18% ) ( +- 0.95% ) ( +- 0.58% ) %chg time to baseline 2.1% 2.4% 2.8% 4.58% 2.85% Binutils build (make all -j64 ) minor-faults 474,821 371,380 269,463 247,715 235,255 228,337 times in seconds 53.882492432 53.584289348 53.882773216 53.755816431 53.607824348 53.423759642 stddev for time ( +- 0.08% ) ( +- 0.56% ) ( +- 0.17% ) ( +- 0.11% ) ( +- 0.60% ) ( +- 0.69% ) %chg time to baseline -0.55% 0.0% -0.23% -0.51% -0.85% Two synthetic tests: access every word in file in sequential/random order. Sequential access 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 263,148 131,166 32,908 16,514 8,260 1,093 times in seconds 53.091138345 53.113191672 53.188776177 53.233017218 53.206841347 53.429979442 stddev for time ( +- 0.06% ) ( +- 0.07% ) ( +- 0.08% ) ( +- 0.09% ) ( +- 0.03% ) ( +- 0.03% ) %chg time to baseline 0.04% 0.18% 0.26% 0.21% 0.63% 8 threads minor-faults 2,097,267 1,048,753 262,237 131,397 65,621 8,274 times in seconds 55.173790028 54.591880790 54.824623287 54.802162211 54.969680503 54.790387715 stddev for time ( +- 0.78% ) ( +- 0.09% ) ( +- 0.08% ) ( +- 0.07% ) ( +- 0.28% ) ( +- 0.05% ) %chg time to baseline -1.05% -0.63% -0.67% -0.36% -0.69% 32 threads minor-faults 8,388,751 4,195,621 1,049,664 525,461 262,535 32,924 times in seconds 60.431573046 60.669110744 60.485336388 60.697789706 60.077959564 60.588855032 stddev for time ( +- 0.44% ) ( +- 0.27% ) ( +- 0.46% ) ( +- 0.67% ) ( +- 0.31% ) ( +- 0.49% ) %chg time to baseline 0.39% 0.08% 0.44% -0.58% 0.25% 64 threads minor-faults 16,777,409 8,607,527 2,289,766 1,202,264 598,405 67,587 times in seconds 96.932617720 100.675418760 102.109880836 103.881733383 102.580199555 105.751194041 stddev for time ( +- 1.39% ) ( +- 1.06% ) ( +- 0.99% ) ( +- 0.76% ) ( +- 1.65% ) ( +- 1.60% ) %chg time to baseline 3.86% 5.34% 7.16% 5.82% 9.09% 128 threads minor-faults 33,554,705 17,375,375 4,682,462 2,337,245 1,179,007 134,819 times in seconds 128.766704495 115.659225437 120.353046307 115.291871270 115.450886036 113.991902150 stddev for time ( +- 2.93% ) ( +- 0.30% ) ( +- 2.93% ) ( +- 1.24% ) ( +- 1.03% ) ( +- 0.70% ) %chg time to baseline -10.17% -6.53% -10.46% -10.34% -11.47% Random access 1GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 17,155 8,678 2,126 1,097 581 134 times in seconds 51.904430523 51.658017987 51.919270792 51.560531738 52.354431597 51.976469502 stddev for time ( +- 3.19% ) ( +- 1.35% ) ( +- 1.56% ) ( +- 0.91% ) ( +- 1.70% ) ( +- 2.02% ) %chg time to baseline -0.47% 0.02% -0.66% 0.86% 0.13% 8 threads minor-faults 131,844 70,705 17,457 8,505 4,251 598 times in seconds 58.162813956 54.991706305 54.952675791 55.323057492 54.755587379 53.376722828 stddev for time ( +- 1.44% ) ( +- 0.69% ) ( +- 1.23% ) ( +- 2.78% ) ( +- 1.90% ) ( +- 2.91% ) %chg time to baseline -5.45% -5.52% -4.88% -5.86% -8.22% 32 threads minor-faults 524,437 270,760 67,069 33,414 16,641 2,204 times in seconds 69.981777072 76.539570015 79.753578505 76.245943618 77.254258344 79.072596831 stddev for time ( +- 2.81% ) ( +- 1.95% ) ( +- 2.66% ) ( +- 0.99% ) ( +- 2.35% ) ( +- 3.22% ) %chg time to baseline 9.37% 13.96% 8.95% 10.39% 12.98% 64 threads minor-faults 1,049,117 527,451 134,016 66,638 33,391 4,559 times in seconds 108.024517536 117.575067996 115.322659914 111.943998437 115.049450815 119.218450840 stddev for time ( +- 2.40% ) ( +- 1.77% ) ( +- 1.19% ) ( +- 3.29% ) ( +- 2.32% ) ( +- 1.42% ) %chg time to baseline 8.84% 6.75% 3.62% 6.5% 10.3% 128 threads minor-faults 2,097,440 1,054,360 267,042 133,328 66,532 8,652 times in seconds 155.055861167 153.059625968 152.449492156 151.024005282 150.844647770 155.954366718 stddev for time ( +- 1.32% ) ( +- 1.14% ) ( +- 1.32% ) ( +- 0.81% ) ( +- 0.75% ) ( +- 0.72% ) %chg time to baseline -1.28% -1.68% -2.59% -2.71% 0.57% In case of kernel build, fault around order (fao) value of 1 and 3 wins when compared to 4 (but bit noisy). Incase of kernel rebuild, slowdown for fao > 0 is seen. Incase of synthetic test, there are sporadic agains, but mostly slowdown. No clear sweet spot fao value that can be suggested for the ppc64/pseries with the current performance data. Hence, patch suggest value of zero to the fao. Worst case scenario: we touch one page every 16M to demonstrate overhead. Touch only one page in page table in 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 1,104 1,090 1,071 1,068 1,065 1,063 times in seconds 0.006583298 0.008531502 0.019733795 0.036033763 0.062300553 0.406857086 stddev for time ( +- 2.79% ) ( +- 2.42% ) ( +- 3.47% ) ( +- 2.81% ) ( +- 2.01% ) ( +- 1.33% ) 8 threads minor-faults 8,279 8,264 8,245 8,243 8,239 8,240 times in seconds 0.044572398 0.057211811 0.107606306 0.205626815 0.381679120 2.647979955 stddev for time ( +- 1.95% ) ( +- 2.98% ) ( +- 1.74% ) ( +- 2.80% ) ( +- 2.01% ) ( +- 1.86% ) 32 threads minor-faults 32,879 32,864 32,849 32,845 32,839 32,843 times in seconds 0.197659343 0.218486087 0.445116407 0.694235883 1.296894038 9.127517045 stddev for time ( +- 3.05% ) ( +- 3.05% ) ( +- 4.33% ) ( +- 3.08% ) ( +- 3.75% ) ( +- 0.56% ) 64 threads minor-faults 65,680 65,664 65,646 65,645 65,640 65,647 times in seconds 0.455537304 0.489688780 0.866490093 1.427393118 2.379628982 17.059295051 stddev for time ( +- 4.01% ) ( +- 4.13% ) ( +- 2.92% ) ( +- 1.68% ) ( +- 1.79% ) ( +- 0.48% ) 128 threads minor-faults 131,279 131,265 131,250 131,245 131,241 131,254 times in seconds 1.026880651 1.095327536 1.721728274 2.808233068 4.662729948 31.732848290 stddev for time ( +- 6.85% ) ( +- 4.09% ) ( +- 1.71% ) ( +- 3.45% ) ( +- 2.40% ) ( +- 0.68% ) Signed-off-by: Madhavan Srinivasan --- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 9921953..6e6c993 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -17,6 +17,8 @@ struct device_node; extern void request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name); +extern unsigned int fault_around_order; + #include extern void __init fw_hypertas_feature_init(const char *hypertas, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 2db8cc6..4391c3c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -465,6 +465,11 @@ static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + /* + * Defaulting to zero since no sweet spot value found in the performance test. + */ + fault_around_order = 0; + /* Discover PIC type and setup ppc_md accordingly */ pseries_discover_pic(); -- 1.7.10.4 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp09.in.ibm.com ([122.248.162.9]:49348 "EHLO e28smtp09.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753321AbaEHJ2c (ORCPT ); Thu, 8 May 2014 05:28:32 -0400 Received: from /spool/local by e28smtp09.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 8 May 2014 14:58:28 +0530 From: Madhavan Srinivasan Subject: [PATCH V4 1/2] mm: move FAULT_AROUND_ORDER to arch/ Date: Thu, 8 May 2014 14:58:15 +0530 Message-ID: <1399541296-18810-2-git-send-email-maddy@linux.vnet.ibm.com> In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Sender: linux-arch-owner@vger.kernel.org List-ID: To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Cc: benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com, Madhavan Srinivasan Message-ID: <20140508092815.ajT-Ox-PCnY27tl7RMn0T1EbhthURTGd8tgCDN__0xE@z> Kirill A. Shutemov with 8c6e50b029 commit introduced vm_ops->map_pages() for mapping easy accessible pages around fault address in hope to reduce number of minor page faults. This patch creates infrastructure to modify the FAULT_AROUND_ORDER value using mm/Kconfig. This will enable architecture maintainers to decide on suitable FAULT_AROUND_ORDER value based on performance data for that architecture. Patch also defaults FAULT_AROUND_ORDER Kconfig element to 4. Signed-off-by: Madhavan Srinivasan --- mm/Kconfig | 8 ++++++++ mm/memory.c | 25 ++++++------------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index ebe5880..c7fc4f1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -176,6 +176,14 @@ config MOVABLE_NODE config HAVE_BOOTMEM_INFO_NODE def_bool n +# +# Fault around order is a control knob to decide the fault around pages. +# Default value is set to 4 , but the arch can override it as desired. +# +config FAULT_AROUND_ORDER + int + default 4 + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" diff --git a/mm/memory.c b/mm/memory.c index 037b812..e3931ef 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,11 +3402,9 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +unsigned int fault_around_order __read_mostly = CONFIG_FAULT_AROUND_ORDER; #ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; - static int fault_around_order_get(void *data, u64 *val) { *val = fault_around_order; @@ -3415,7 +3413,6 @@ static int fault_around_order_get(void *data, u64 *val) static int fault_around_order_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); if (1UL << val > PTRS_PER_PTE) return -EINVAL; fault_around_order = val; @@ -3435,31 +3432,21 @@ static int __init fault_around_debugfs(void) return 0; } late_initcall(fault_around_debugfs); +#endif static inline unsigned long fault_around_pages(void) { - return 1UL << fault_around_order; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); -} -#else -static inline unsigned long fault_around_pages(void) -{ unsigned long nr_pages; - nr_pages = 1UL << FAULT_AROUND_ORDER; - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); + nr_pages = 1UL << fault_around_order; + VM_BUG_ON(nr_pages > PTRS_PER_PTE); return nr_pages; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); + return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); } -#endif static void do_fault_around(struct vm_area_struct *vma, unsigned long address, pte_t *pte, pgoff_t pgoff, unsigned int flags) @@ -3515,7 +3502,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if ((vma->vm_ops->map_pages) && fault_around_order) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- 1.7.10.4 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp03.in.ibm.com ([122.248.162.3]:53656 "EHLO e28smtp03.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754843AbaEOIZU (ORCPT ); Thu, 15 May 2014 04:25:20 -0400 Received: from /spool/local by e28smtp03.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 15 May 2014 13:55:18 +0530 Message-ID: <537479E7.90806@linux.vnet.ibm.com> Date: Thu, 15 May 2014 13:55:11 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Cc: benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140515082511.19H8biK8_OLyU5XOt0Ie5B2lxaT8jZKyrXhx08MxFto@z> Hi Ingo, Do you have any comments for the latest version of the patchset. If not, kindly can you pick it up as is. With regards Maddy > Kirill A. Shutemov with 8c6e50b029 commit introduced > vm_ops->map_pages() for mapping easy accessible pages around > fault address in hope to reduce number of minor page faults. > > This patch creates infrastructure to modify the FAULT_AROUND_ORDER > value using mm/Kconfig. This will enable architecture maintainers > to decide on suitable FAULT_AROUND_ORDER value based on > performance data for that architecture. First patch also defaults > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > out the performance numbers for powerpc (platform pseries) and > initialize the fault around order variable for pseries platform of > powerpc. > > V4 Changes: > Replaced the BUILD_BUG_ON with VM_BUG_ON. > Moved fault_around_pages() and fault_around_mask() functions outside of > #ifdef CONFIG_DEBUG_FS. > > V3 Changes: > Replaced FAULT_AROUND_ORDER macro to a variable to support arch's that > supports sub platforms. > Made changes in commit messages. > > V2 Changes: > Created Kconfig parameter for FAULT_AROUND_ORDER > Added check in do_read_fault to handle FAULT_AROUND_ORDER value of 0 > Made changes in commit messages. > > Madhavan Srinivasan (2): > mm: move FAULT_AROUND_ORDER to arch/ > powerpc/pseries: init fault_around_order for pseries > > arch/powerpc/platforms/pseries/pseries.h | 2 ++ > arch/powerpc/platforms/pseries/setup.c | 5 +++++ > mm/Kconfig | 8 ++++++++ > mm/memory.c | 25 ++++++------------------- > 4 files changed, 21 insertions(+), 19 deletions(-) > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f43.google.com ([209.85.220.43]:55627 "EHLO mail-pa0-f43.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752014AbaEOR3f (ORCPT ); Thu, 15 May 2014 13:29:35 -0400 Received: by mail-pa0-f43.google.com with SMTP id hz1so1349393pad.30 for ; Thu, 15 May 2014 10:29:34 -0700 (PDT) Date: Thu, 15 May 2014 10:28:17 -0700 (PDT) From: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <537479E7.90806@linux.vnet.ibm.com> Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Sender: linux-arch-owner@vger.kernel.org List-ID: To: Madhavan Srinivasan Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140515172817.XqPXvwlD1ET4yWxRQoFDsHiS07P6QnJP3aftGNNAavA@z> On Thu, 15 May 2014, Madhavan Srinivasan wrote: > > Hi Ingo, > > Do you have any comments for the latest version of the patchset. If > not, kindly can you pick it up as is. > > > With regards > Maddy > > > Kirill A. Shutemov with 8c6e50b029 commit introduced > > vm_ops->map_pages() for mapping easy accessible pages around > > fault address in hope to reduce number of minor page faults. > > > > This patch creates infrastructure to modify the FAULT_AROUND_ORDER > > value using mm/Kconfig. This will enable architecture maintainers > > to decide on suitable FAULT_AROUND_ORDER value based on > > performance data for that architecture. First patch also defaults > > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > > out the performance numbers for powerpc (platform pseries) and > > initialize the fault around order variable for pseries platform of > > powerpc. Sorry for not commenting earlier - just reminded by this ping to Ingo. I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. arch/powerpc/Kconfig suggests that Power supports base page size of 4k, 16k, 64k or 256k. I would expect your optimal fault_around_order to depend very much on the base page size. Perhaps fault_around_size would provide a more useful default? Hugh From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ozlabs.org ([103.22.144.67]:57143 "EHLO ozlabs.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751444AbaESBnr (ORCPT ); Sun, 18 May 2014 21:43:47 -0400 From: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> Date: Mon, 19 May 2014 09:42:46 +0930 Message-ID: <87wqdik4n5.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-arch-owner@vger.kernel.org List-ID: To: Hugh Dickins , Madhavan Srinivasan Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140519001246.QULuZ39gHj77TV2leTOaKNGdlFsR0A1m1uBrYrQrKXw@z> Hugh Dickins writes: > On Thu, 15 May 2014, Madhavan Srinivasan wrote: >> >> Hi Ingo, >> >> Do you have any comments for the latest version of the patchset. If >> not, kindly can you pick it up as is. >> >> >> With regards >> Maddy >> >> > Kirill A. Shutemov with 8c6e50b029 commit introduced >> > vm_ops->map_pages() for mapping easy accessible pages around >> > fault address in hope to reduce number of minor page faults. >> > >> > This patch creates infrastructure to modify the FAULT_AROUND_ORDER >> > value using mm/Kconfig. This will enable architecture maintainers >> > to decide on suitable FAULT_AROUND_ORDER value based on >> > performance data for that architecture. First patch also defaults >> > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >> > out the performance numbers for powerpc (platform pseries) and >> > initialize the fault around order variable for pseries platform of >> > powerpc. > > Sorry for not commenting earlier - just reminded by this ping to Ingo. > > I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. > > arch/powerpc/Kconfig suggests that Power supports base page size of > 4k, 16k, 64k or 256k. > > I would expect your optimal fault_around_order to depend very much on > the base page size. It was 64k, which is what PPC64 uses on all the major distributions. You really only get a choice of 4k and 64k with 64 bit power. > Perhaps fault_around_size would provide a more useful default? That seems to fit. With 4k pages and order 4, you're asking for 64k. Maddy's result shows 64k is also reasonable for 64k pages. Perhaps we try to generalize from two data points (a slight improvement over doing it from 1!), eg: /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ unsigned int fault_around_order __read_mostly = (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); Cheers, Rusty. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e23smtp04.au.ibm.com ([202.81.31.146]:54813 "EHLO e23smtp04.au.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751897AbaESDGG (ORCPT ); Sun, 18 May 2014 23:06:06 -0400 Received: from /spool/local by e23smtp04.au.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Mon, 19 May 2014 13:06:04 +1000 Message-ID: <53797511.1050409@linux.vnet.ibm.com> Date: Mon, 19 May 2014 08:35:53 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> In-Reply-To: <87wqdik4n5.fsf@rustcorp.com.au> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: Rusty Russell , Hugh Dickins Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140519030553.GqvRl3Y5hKWrpztbW_J39eiXSiliWqSuVmzJViTQTOM@z> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > Hugh Dickins writes: >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >>> >>> Hi Ingo, >>> >>> Do you have any comments for the latest version of the patchset. If >>> not, kindly can you pick it up as is. >>> >>> >>> With regards >>> Maddy >>> >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >>>> vm_ops->map_pages() for mapping easy accessible pages around >>>> fault address in hope to reduce number of minor page faults. >>>> >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >>>> value using mm/Kconfig. This will enable architecture maintainers >>>> to decide on suitable FAULT_AROUND_ORDER value based on >>>> performance data for that architecture. First patch also defaults >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >>>> out the performance numbers for powerpc (platform pseries) and >>>> initialize the fault around order variable for pseries platform of >>>> powerpc. >> >> Sorry for not commenting earlier - just reminded by this ping to Ingo. >> >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >> >> arch/powerpc/Kconfig suggests that Power supports base page size of >> 4k, 16k, 64k or 256k. >> >> I would expect your optimal fault_around_order to depend very much on >> the base page size. > > It was 64k, which is what PPC64 uses on all the major distributions. > You really only get a choice of 4k and 64k with 64 bit power. > This is true. PPC64 support multiple pagesize and yes the default page size of 64k, is taken as base pagesize for the tests. >> Perhaps fault_around_size would provide a more useful default? > > That seems to fit. With 4k pages and order 4, you're asking for 64k. > Maddy's result shows 64k is also reasonable for 64k pages. > > Perhaps we try to generalize from two data points (a slight improvement > over doing it from 1!), eg: > > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > unsigned int fault_around_order __read_mostly = > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > This may be right. But these are the concerns, will not this make other arch to pick default without any tuning and also this will remove the compile time option to disable the feature? Thanks for review With regards Maddy > Cheers, > Rusty. > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f44.google.com ([209.85.220.44]:47088 "EHLO mail-pa0-f44.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751256AbaESXYY (ORCPT ); Mon, 19 May 2014 19:24:24 -0400 Received: by mail-pa0-f44.google.com with SMTP id ld10so6453358pab.31 for ; Mon, 19 May 2014 16:24:23 -0700 (PDT) Date: Mon, 19 May 2014 16:23:07 -0700 (PDT) From: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <53797511.1050409@linux.vnet.ibm.com> Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Sender: linux-arch-owner@vger.kernel.org List-ID: To: Madhavan Srinivasan Cc: Rusty Russell , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140519232307.5nS_YHvtscgRhH5F64SRyMbN2b-tgL4D5q9s2f2QMbU@z> On Mon, 19 May 2014, Madhavan Srinivasan wrote: > On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > > Hugh Dickins writes: > >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: > >>> > >>> Hi Ingo, > >>> > >>> Do you have any comments for the latest version of the patchset. If > >>> not, kindly can you pick it up as is. > >>> > >>> > >>> With regards > >>> Maddy > >>> > >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced > >>>> vm_ops->map_pages() for mapping easy accessible pages around > >>>> fault address in hope to reduce number of minor page faults. > >>>> > >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER > >>>> value using mm/Kconfig. This will enable architecture maintainers > >>>> to decide on suitable FAULT_AROUND_ORDER value based on > >>>> performance data for that architecture. First patch also defaults > >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > >>>> out the performance numbers for powerpc (platform pseries) and > >>>> initialize the fault around order variable for pseries platform of > >>>> powerpc. > >> > >> Sorry for not commenting earlier - just reminded by this ping to Ingo. > >> > >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. > >> > >> arch/powerpc/Kconfig suggests that Power supports base page size of > >> 4k, 16k, 64k or 256k. > >> > >> I would expect your optimal fault_around_order to depend very much on > >> the base page size. > > > > It was 64k, which is what PPC64 uses on all the major distributions. > > You really only get a choice of 4k and 64k with 64 bit power. > > > This is true. PPC64 support multiple pagesize and yes the default page > size of 64k, is taken as base pagesize for the tests. > > >> Perhaps fault_around_size would provide a more useful default? > > > > That seems to fit. With 4k pages and order 4, you're asking for 64k. > > Maddy's result shows 64k is also reasonable for 64k pages. > > > > Perhaps we try to generalize from two data points (a slight improvement > > over doing it from 1!), eg: > > > > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > > unsigned int fault_around_order __read_mostly = > > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); Rusty's bimodal answer doesn't seem the right starting point to me. Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be the order of the fault-around size in bytes, and fault_around_pages() use 1UL << (fault_around_order - PAGE_SHIFT) - when that doesn't wrap, of course! That would at least have a better chance of being appropriate for architectures with 8k and 16k pages (Itanium springs to mind). Not necessarily right for them, since each architecture may have different faulting overheads; but a better chance of being right than blindly assuming 4k or 64k pages for everyone. I'd be glad to see that change go into v3.15: what do you think, Kirill, are we too late to make such a change now? Or do you see some objection to it? > This may be right. But these are the concerns, will not this make other > arch to pick default without any tuning Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? Did other architectures, with other page sizes, back that default? Clearly not powerpc. > and also this will remove the > compile time option to disable the feature? Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig for v3.16? I'm not sure whether Rusty was arguing against that or not. I think we are all three concerned to have a more sensible default than what's there at present. I don't feel very strongly about your Kconfig option: I've no objection, if it were to default to byte order 16. Hugh From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org ([140.211.169.12]:50654 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751275AbaESXnD (ORCPT ); Mon, 19 May 2014 19:43:03 -0400 Date: Mon, 19 May 2014 16:43:01 -0700 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-ID: <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> In-Reply-To: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: Hugh Dickins Cc: Madhavan Srinivasan , Rusty Russell , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140519234301.0QAstrhkTC-Qgfvh1qmRbrpljBFCn3KzoKOLJJ-9gAs@z> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) Yes. And shame on me for missing it (this time!) at review. There's still time to fix this. Patches, please. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com ([134.134.136.24]:38274 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750758AbaETAoh (ORCPT ); Mon, 19 May 2014 20:44:37 -0400 From: "Kirill A. Shutemov" In-Reply-To: <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Content-Transfer-Encoding: 7bit Message-ID: <20140520004429.E660AE009B@blue.fi.intel.com> Date: Tue, 20 May 2014 03:44:29 +0300 (EEST) Sender: linux-arch-owner@vger.kernel.org List-ID: To: Andrew Morton Cc: Hugh Dickins , Madhavan Srinivasan , Rusty Russell , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140520004429.lAyFfSp7jcMplfdiMuR73y8ZHZxXwXniwIwRYRyNKjQ@z> Andrew Morton wrote: > On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > the order of the fault-around size in bytes, and fault_around_pages() > > use 1UL << (fault_around_order - PAGE_SHIFT) > > Yes. And shame on me for missing it (this time!) at review. > > There's still time to fix this. Patches, please. Here it is. Made at 3.30 AM, build tested only. I'll sign it off tomorrow after testing. diff --git a/mm/memory.c b/mm/memory.c index 037b812a9531..9d6941c9a9e4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,62 +3402,62 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +#define FAULT_AROUND_BYTES 65536 #ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; +static unsigned int fault_around_bytes = FAULT_AROUND_BYTES; -static int fault_around_order_get(void *data, u64 *val) +static int fault_around_bytes_get(void *data, u64 *val) { - *val = fault_around_order; + *val = fault_around_bytes; return 0; } -static int fault_around_order_set(void *data, u64 val) +static int fault_around_bytes_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); - if (1UL << val > PTRS_PER_PTE) + BUILD_BUG_ON(FAULT_AROUND_BYTES / PAGE_SIZE > PTRS_PER_PTE); + if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; - fault_around_order = val; + fault_around_bytes = val; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, - fault_around_order_get, fault_around_order_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { void *ret; - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, - &fault_around_order_fops); + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, + &fault_around_bytes_fops); if (!ret) - pr_warn("Failed to create fault_around_order in debugfs"); + pr_warn("Failed to create fault_around_bytes in debugfs"); return 0; } late_initcall(fault_around_debugfs); static inline unsigned long fault_around_pages(void) { - return 1UL << fault_around_order; + return fault_around_bytes / PAGE_SIZE; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); + return ~(round_down(fault_around_bytes, PAGE_SIZE) - 1); } #else static inline unsigned long fault_around_pages(void) { unsigned long nr_pages; - nr_pages = 1UL << FAULT_AROUND_ORDER; + nr_pages = FAULT_AROUND_BYTES / PAGE_SIZE; BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); return nr_pages; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); + return ~(round_down(FAULT_AROUND_BYTES, PAGE_SIZE) - 1); } #endif @@ -3515,7 +3515,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e23smtp06.au.ibm.com ([202.81.31.148]:32894 "EHLO e23smtp06.au.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752500AbaETCHD (ORCPT ); Mon, 19 May 2014 22:07:03 -0400 Received: from /spool/local by e23smtp06.au.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 12:07:00 +1000 Message-ID: <537AB8B2.3040000@linux.vnet.ibm.com> Date: Tue, 20 May 2014 07:36:42 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> In-Reply-To: Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: Hugh Dickins Cc: Rusty Russell , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140520020642.4sTPpIZmtzUjfnNBe6X9aKWxofLKliFR66Qc10xpWuk@z> On Tuesday 20 May 2014 04:53 AM, Hugh Dickins wrote: > On Mon, 19 May 2014, Madhavan Srinivasan wrote: >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: >>> Hugh Dickins writes: >>>> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >>>>> >>>>> Hi Ingo, >>>>> >>>>> Do you have any comments for the latest version of the patchset. If >>>>> not, kindly can you pick it up as is. >>>>> >>>>> >>>>> With regards >>>>> Maddy >>>>> >>>>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >>>>>> vm_ops->map_pages() for mapping easy accessible pages around >>>>>> fault address in hope to reduce number of minor page faults. >>>>>> >>>>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >>>>>> value using mm/Kconfig. This will enable architecture maintainers >>>>>> to decide on suitable FAULT_AROUND_ORDER value based on >>>>>> performance data for that architecture. First patch also defaults >>>>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >>>>>> out the performance numbers for powerpc (platform pseries) and >>>>>> initialize the fault around order variable for pseries platform of >>>>>> powerpc. >>>> >>>> Sorry for not commenting earlier - just reminded by this ping to Ingo. >>>> >>>> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >>>> >>>> arch/powerpc/Kconfig suggests that Power supports base page size of >>>> 4k, 16k, 64k or 256k. >>>> >>>> I would expect your optimal fault_around_order to depend very much on >>>> the base page size. >>> >>> It was 64k, which is what PPC64 uses on all the major distributions. >>> You really only get a choice of 4k and 64k with 64 bit power. >>> >> This is true. PPC64 support multiple pagesize and yes the default page >> size of 64k, is taken as base pagesize for the tests. >> >>>> Perhaps fault_around_size would provide a more useful default? >>> >>> That seems to fit. With 4k pages and order 4, you're asking for 64k. >>> Maddy's result shows 64k is also reasonable for 64k pages. >>> >>> Perhaps we try to generalize from two data points (a slight improvement >>> over doing it from 1!), eg: >>> >>> /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ >>> unsigned int fault_around_order __read_mostly = >>> (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > Rusty's bimodal answer doesn't seem the right starting point to me. > > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) > - when that doesn't wrap, of course! > > That would at least have a better chance of being appropriate for > architectures with 8k and 16k pages (Itanium springs to mind). > > Not necessarily right for them, since each architecture may have > different faulting overheads; but a better chance of being right > than blindly assuming 4k or 64k pages for everyone. > > I'd be glad to see that change go into v3.15: what do you think, > Kirill, are we too late to make such a change now? > Or do you see some objection to it? > >> This may be right. But these are the concerns, will not this make other >> arch to pick default without any tuning > > Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? > Did other architectures, with other page sizes, back that default? > Clearly not powerpc. Ok. > >> and also this will remove the >> compile time option to disable the feature? > > Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig > for v3.16? > > I'm not sure whether Rusty was arguing against that or not I think > we are all three concerned to have a more sensible default than what's > there at present. I don't feel very strongly about your Kconfig Added it as one way to reset or disable the default value. But then I guess we decided on having FAULT_AROUND_ORDER as a variable which is more important than Kconfig option. > option: I've no objection, if it were to default to byte order 16. > Thanks for review With regards Maddy > Hugh > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ozlabs.org ([103.22.144.67]:43673 "EHLO ozlabs.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750870AbaETCKG (ORCPT ); Mon, 19 May 2014 22:10:06 -0400 From: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Date: Tue, 20 May 2014 10:44:06 +0930 Message-ID: <87d2f9jlpd.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-arch-owner@vger.kernel.org List-ID: To: Hugh Dickins , Madhavan Srinivasan Cc: "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140520011406.oQEn0NLREqIhe-adG7vZrl3rVtmdASRQu7zJPPEIhTI@z> Hugh Dickins writes: > On Mon, 19 May 2014, Madhavan Srinivasan wrote: >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: >> > Hugh Dickins writes: >> >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >> >>> >> >>> Hi Ingo, >> >>> >> >>> Do you have any comments for the latest version of the patchset. If >> >>> not, kindly can you pick it up as is. >> >>> >> >>> >> >>> With regards >> >>> Maddy >> >>> >> >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >> >>>> vm_ops->map_pages() for mapping easy accessible pages around >> >>>> fault address in hope to reduce number of minor page faults. >> >>>> >> >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >> >>>> value using mm/Kconfig. This will enable architecture maintainers >> >>>> to decide on suitable FAULT_AROUND_ORDER value based on >> >>>> performance data for that architecture. First patch also defaults >> >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >> >>>> out the performance numbers for powerpc (platform pseries) and >> >>>> initialize the fault around order variable for pseries platform of >> >>>> powerpc. >> >> >> >> Sorry for not commenting earlier - just reminded by this ping to Ingo. >> >> >> >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >> >> >> >> arch/powerpc/Kconfig suggests that Power supports base page size of >> >> 4k, 16k, 64k or 256k. >> >> >> >> I would expect your optimal fault_around_order to depend very much on >> >> the base page size. >> > >> > It was 64k, which is what PPC64 uses on all the major distributions. >> > You really only get a choice of 4k and 64k with 64 bit power. >> > >> This is true. PPC64 support multiple pagesize and yes the default page >> size of 64k, is taken as base pagesize for the tests. >> >> >> Perhaps fault_around_size would provide a more useful default? >> > >> > That seems to fit. With 4k pages and order 4, you're asking for 64k. >> > Maddy's result shows 64k is also reasonable for 64k pages. >> > >> > Perhaps we try to generalize from two data points (a slight improvement >> > over doing it from 1!), eg: >> > >> > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ >> > unsigned int fault_around_order __read_mostly = >> > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > Rusty's bimodal answer doesn't seem the right starting point to me. ? It's not bimodal, it's graded. I think you misread? > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) > - when that doesn't wrap, of course! > > That would at least have a better chance of being appropriate for > architectures with 8k and 16k pages (Itanium springs to mind). Well, from our two data points it seems that we want to fault in 64k at a time whatever our page size. Perhaps it's clearer if the code expresses itself that way. > Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? > Did other architectures, with other page sizes, back that default? > Clearly not powerpc. Yeah, BenH flagged it as "we should test this" for powerpc, which is what Maddy then did. >> and also this will remove the >> compile time option to disable the feature? > > Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig > for v3.16? > > I'm not sure whether Rusty was arguing against that or not. I think > we are all three concerned to have a more sensible default than what's > there at present. I don't feel very strongly about your Kconfig > option: I've no objection, if it were to default to byte order 16. I don't mind either. Cheers, Rusty. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pb0-f44.google.com ([209.85.160.44]:47666 "EHLO mail-pb0-f44.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751515AbaETCfo (ORCPT ); Mon, 19 May 2014 22:35:44 -0400 Received: by mail-pb0-f44.google.com with SMTP id rq2so6590115pbb.31 for ; Mon, 19 May 2014 19:35:44 -0700 (PDT) Date: Mon, 19 May 2014 19:34:27 -0700 (PDT) From: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <87d2f9jlpd.fsf@rustcorp.com.au> Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <87d2f9jlpd.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Sender: linux-arch-owner@vger.kernel.org List-ID: To: Rusty Russell Cc: Madhavan Srinivasan , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140520023427.HTLw4DCHhvW2IPLw-HE4kcxMbh1uKvOQ5BXpndg3CXM@z> On Tue, 20 May 2014, Rusty Russell wrote: > Hugh Dickins writes: > >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > >> > > >> > Perhaps we try to generalize from two data points (a slight improvement > >> > over doing it from 1!), eg: > >> > > >> > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > >> > unsigned int fault_around_order __read_mostly = > >> > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > > > Rusty's bimodal answer doesn't seem the right starting point to me. > > ? It's not bimodal, it's graded. I think you misread? Yikes, worse than misread, more like I was too rude even to read: sorry! Hugh From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ozlabs.org ([103.22.144.67]:42378 "EHLO ozlabs.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750969AbaETHRJ (ORCPT ); Tue, 20 May 2014 03:17:09 -0400 From: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <20140520004429.E660AE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> Date: Tue, 20 May 2014 15:52:07 +0930 Message-ID: <87oaythsvk.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-arch-owner@vger.kernel.org List-ID: To: "Kirill A. Shutemov" , Andrew Morton Cc: Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140520062207.LHP3agVeEtnBlt0CDfHO1RxC7ZdCXbs3RiiJWGyrYr0@z> "Kirill A. Shutemov" writes: > Andrew Morton wrote: >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >> >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >> > the order of the fault-around size in bytes, and fault_around_pages() >> > use 1UL << (fault_around_order - PAGE_SHIFT) >> >> Yes. And shame on me for missing it (this time!) at review. >> >> There's still time to fix this. Patches, please. > > Here it is. Made at 3.30 AM, build tested only. Prefer on top of Maddy's patch which makes it always a variable, rather than CONFIG_DEBUG_FS. It's got enough hair as it is. Cheers, Rusty. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org ([140.211.169.12]:53609 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751824AbaETH24 (ORCPT ); Tue, 20 May 2014 03:28:56 -0400 Date: Tue, 20 May 2014 00:28:34 -0700 From: Andrew Morton Subject: Re: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries Message-ID: <20140520002834.aefb5a90.akpm@linux-foundation.org> In-Reply-To: <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: Madhavan Srinivasan Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140520072834.-Q2YigybrOPVCK_AUyQZTCAu8hzY8mr5cfOrZJ3qirc@z> On Thu, 8 May 2014 14:58:16 +0530 Madhavan Srinivasan wrote: > --- a/arch/powerpc/platforms/pseries/pseries.h > +++ b/arch/powerpc/platforms/pseries/pseries.h > @@ -17,6 +17,8 @@ struct device_node; > extern void request_event_sources_irqs(struct device_node *np, > irq_handler_t handler, const char *name); > > +extern unsigned int fault_around_order; This isn't an appropriate header file for exporting something from core mm - what happens if arch/mn10300 wants it?. I guess include/linux/mm.h is the place. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org ([140.211.169.12]:53632 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751733AbaETHcX (ORCPT ); Tue, 20 May 2014 03:32:23 -0400 Date: Tue, 20 May 2014 00:32:01 -0700 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-ID: <20140520003201.a2360d5d.akpm@linux-foundation.org> In-Reply-To: <87oaythsvk.fsf@rustcorp.com.au> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: Rusty Russell Cc: "Kirill A. Shutemov" , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140520073201.qxtJQ2dlNRVdqYZDS7X-jhM_IbwXYn4uGIxWND87uOA@z> On Tue, 20 May 2014 15:52:07 +0930 Rusty Russell wrote: > "Kirill A. Shutemov" writes: > > Andrew Morton wrote: > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >> > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >> > the order of the fault-around size in bytes, and fault_around_pages() > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > >> > >> Yes. And shame on me for missing it (this time!) at review. > >> > >> There's still time to fix this. Patches, please. > > > > Here it is. Made at 3.30 AM, build tested only. > > Prefer on top of Maddy's patch which makes it always a variable, rather > than CONFIG_DEBUG_FS. It's got enough hair as it is. > We're at 3.15-rc5 and this interface should be finalised for 3.16. So Kirrill's patch is pretty urgent and should come first. Well. It's only a debugfs interface at this stage so we are allowed to change it later, but it's better not to. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp05.in.ibm.com ([122.248.162.5]:50228 "EHLO e28smtp05.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752240AbaETIEI (ORCPT ); Tue, 20 May 2014 04:04:08 -0400 Received: from /spool/local by e28smtp05.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 13:34:05 +0530 Message-ID: <537B0C6E.4030501@linux.vnet.ibm.com> Date: Tue, 20 May 2014 13:33:58 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> <20140520002834.aefb5a90.akpm@linux-foundation.org> In-Reply-To: <20140520002834.aefb5a90.akpm@linux-foundation.org> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: Andrew Morton Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140520080358.xL4x9GIqshgB7edJvzHYb4ldZc4XCzgU_ZuNbxFp-OI@z> On Tuesday 20 May 2014 12:58 PM, Andrew Morton wrote: > On Thu, 8 May 2014 14:58:16 +0530 Madhavan Srinivasan wrote: > >> --- a/arch/powerpc/platforms/pseries/pseries.h >> +++ b/arch/powerpc/platforms/pseries/pseries.h >> @@ -17,6 +17,8 @@ struct device_node; >> extern void request_event_sources_irqs(struct device_node *np, >> irq_handler_t handler, const char *name); >> >> +extern unsigned int fault_around_order; > > This isn't an appropriate header file for exporting something from core > mm - what happens if arch/mn10300 wants it?. > > I guess include/linux/mm.h is the place. > Rusty already suggested this. My bad. Reason for adding it here was that, I did the performance test for this platform. Will change and send it out. Thanks for review Regards Maddy From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com ([134.134.136.24]:4927 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751033AbaEUNkk (ORCPT ); Wed, 21 May 2014 09:40:40 -0400 From: "Kirill A. Shutemov" In-Reply-To: <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Content-Transfer-Encoding: 7bit Message-ID: <20140521134027.263DDE009B@blue.fi.intel.com> Date: Wed, 21 May 2014 16:40:27 +0300 (EEST) Sender: linux-arch-owner@vger.kernel.org List-ID: To: Andrew Morton Cc: "Kirill A. Shutemov" , Rusty Russell , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140521134027.C-G8D1xQtm-IOiVMcpO_Rlk0UoDzv82zd46LTzAU9wM@z> Andrew Morton wrote: > On Tue, 20 May 2014 13:27:38 +0300 (EEST) "Kirill A. Shutemov" wrote: > > > Rusty Russell wrote: > > > "Kirill A. Shutemov" writes: > > > > Andrew Morton wrote: > > > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > > >> > > > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > > >> > the order of the fault-around size in bytes, and fault_around_pages() > > > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > > > >> > > > >> Yes. And shame on me for missing it (this time!) at review. > > > >> > > > >> There's still time to fix this. Patches, please. > > > > > > > > Here it is. Made at 3.30 AM, build tested only. > > > > > > Prefer on top of Maddy's patch which makes it always a variable, rather > > > than CONFIG_DEBUG_FS. It's got enough hair as it is. > > > > Something like this? > > This appears to be against mainline, not against Madhavan's patch. As > mentioned previously, I'd prefer it that way but confused. > > > > From: "Kirill A. Shutemov" > > Date: Tue, 20 May 2014 13:02:03 +0300 > > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > > > There are evidences that faultaround feature is less relevant on > > architectures with page size bigger then 4k. Which makes sense since > > page fault overhead per byte of mapped area should be less there. > > > > Let's rework the feature to specify faultaround area in bytes instead of > > page order. It's 64 kilobytes for now. > > > > The patch effectively disables faultaround on architectures with > > page size >= 64k (like ppc64). > > > > It's possible that some other size of faultaround area is relevant for a > > platform. We can expose `fault_around_bytes' variable to arch-specific > > code once such platforms will be found. > > > > Signed-off-by: Kirill A. Shutemov > > --- > > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > > 1 file changed, 23 insertions(+), 39 deletions(-) > > > > diff --git a/mm/memory.c b/mm/memory.c > > index 037b812a9531..252b319e8cdf 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > > update_mmu_cache(vma, address, pte); > > } > > > > -#define FAULT_AROUND_ORDER 4 > > +static unsigned long fault_around_bytes = 65536; > > + > > +static inline unsigned long fault_around_pages(void) > > +{ > > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > > +} > > I think we should round up, not down. So if the user asks for 1kb, > they get one page. > > So this becomes > > return PAGE_ALIGN(fault_around_bytes) / PAGE_SIZE; See below. > > +static inline unsigned long fault_around_mask(void) > > +{ > > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > > +} > > And this has me a bit stumped. It's not helpful that do_fault_around() > is undocumented. Does it fault in N/2 pages ahead and N/2 pages > behind? Or does it align the address down to the highest multiple of > fault_around_bytes? It appears to be the latter, so the location of > the faultaround window around the fault address is basically random, > depending on what address userspace happened to pick. I don't know why > we did this :( When we call ->map_pages() we need to make sure that we stay within VMA and the page table. We don't want to cross page table boundary, because page table is what ptlock covers in split ptlock case. I've designed the feature with fault area nominated in page order in mind and I found it's easier to make sure we don't cross boundaries, if we would align virtual address of fault around area to PAGE_SIZE << FAULT_AROUND_ORDER. And yes fault address may be anywhere within the area. You can think about this as a virtual page with size PAGE_SIZE << FAULT_AROUND_ORDER: no matter what is fault address, we handle area naturally aligned to page size which fault address belong to. I've used rounddown_pow_of_two() in the patch to align to nearest page order, not to page size, because that's what current do_fault_around() expect to see. And roundup is not an option: nobody expects fault around area to be 128k if fault_around_bytes set to 64k + 1 bytes. If you think we need this I can rework do_fault_around() to handle non-pow-of-two fault_around_pages(), but I don't think it's good idea to do this for v3.15. Anyway, patch I've proposed allows change fault_around_bytes only from DEBUG_FS and roundown should be good enough there. > Or something. Can we please get some code commentary over > do_fault_around() describing this design decision and explaining the > reasoning behind it? I'll do this. But if do_fault_around() rework is needed, I want to do that first. > Also, "neast" is not a word. :facepalm: From: "Kirill A. Shutemov" Date: Wed, 21 May 2014 16:36:42 +0300 Subject: [PATCH] mm: fix typo in comment in do_fault_around() Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 252b319e8cdf..f76663c31da6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3460,7 +3460,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, /* * max_pgoff is either end of page table or end of vma - * or fault_around_pages() from pgoff, depending what is neast. + * or fault_around_pages() from pgoff, depending what is nearest. */ max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga01.intel.com ([192.55.52.88]:23986 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750981AbaEWM3V (ORCPT ); Fri, 23 May 2014 08:29:21 -0400 From: "Kirill A. Shutemov" In-Reply-To: <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> <20140521134027.263DDE009B@blue.fi.intel.com> <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Content-Transfer-Encoding: 7bit Message-ID: <20140523122854.BDB36E009B@blue.fi.intel.com> Date: Fri, 23 May 2014 15:28:54 +0300 (EEST) Sender: linux-arch-owner@vger.kernel.org List-ID: To: Andrew Morton Cc: "Kirill A. Shutemov" , Rusty Russell , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140523122854.zqEjtozbAxhNuVZwOiFnMcUwZ0fYn0IeRdr70ogiMoo@z> Andrew Morton wrote: > On Wed, 21 May 2014 16:40:27 +0300 (EEST) "Kirill A. Shutemov" wrote: > > > > Or something. Can we please get some code commentary over > > > do_fault_around() describing this design decision and explaining the > > > reasoning behind it? > > > > I'll do this. But if do_fault_around() rework is needed, I want to do that > > first. > > This sort of thing should be at least partially driven by observation > and I don't have the data for that. My seat of the pants feel is that > after the first fault, accesses at higher addresses are more > common/probable than accesses at lower addresses. It's probably true for data, but the feature is mostly targeted to code pages and situation is not that obvious to me with all jumps. > But we don't need to do all that right now. Let's get the current > implementation wrapped up for 3.15: get the interface finalized (bytes, > not pages!) The patch above by thread is okay for that, right? > and get the current design decisions appropriately documented. Here it is. Based on patch to convert order->bytes. From: "Kirill A. Shutemov" Date: Fri, 23 May 2014 15:16:47 +0300 Subject: [PATCH] mm: document do_fault_around() feature Some clarification on how faultaround works. Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 252b319e8cdf..8d723b8d3c86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3404,6 +3404,10 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, static unsigned long fault_around_bytes = 65536; +/* + * fault_around_pages() and fault_around_mask() round down fault_around_bytes + * to nearest page order. It's what do_fault_around() expects to see. + */ static inline unsigned long fault_around_pages(void) { return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; @@ -3445,6 +3449,29 @@ static int __init fault_around_debugfs(void) late_initcall(fault_around_debugfs); #endif +/* + * do_fault_around() tries to map few pages around the fault address. The hope + * is that the pages will be needed soon and this would lower the number of + * faults to handle. + * + * It uses vm_ops->map_pages() to map the pages, which skips the page if it's + * not ready to be mapped: not up-to-date, locked, etc. + * + * This function is called with the page table lock taken. In the split ptlock + * case the page table lock only protects only those entries which belong to + * page table corresponding to the fault address. + * + * This function don't cross the VMA boundaries in order to call map_pages() + * only once. + * + * fault_around_pages() defines how many pages we'll try to map. + * do_fault_around() expects it to be power of two and less or equal to + * PTRS_PER_PTE. + * + * The virtual address of the area that we map is naturally aligned to the + * fault_around_pages() (and therefore to page order). This way it's easier to + * guarantee that we don't cross the page table boundaries. + */ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, pte_t *pte, pgoff_t pgoff, unsigned int flags) { -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp09.in.ibm.com ([122.248.162.9]:51266 "EHLO e28smtp09.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751977AbaE0GZD (ORCPT ); Tue, 27 May 2014 02:25:03 -0400 Received: from /spool/local by e28smtp09.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 27 May 2014 11:54:58 +0530 Message-ID: <53842FB1.7090909@linux.vnet.ibm.com> Date: Tue, 27 May 2014 11:54:49 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> In-Reply-To: <20140520102738.7F096E009B@blue.fi.intel.com> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: linux-arch-owner@vger.kernel.org List-ID: To: "Kirill A. Shutemov" , Rusty Russell Cc: Andrew Morton , Hugh Dickins , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140527062449.WnyFEgYU0VOVV5aRkQE4EV5p6FxIaT70cdd3FuOu2zM@z> On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: > Rusty Russell wrote: >> "Kirill A. Shutemov" writes: >>> Andrew Morton wrote: >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>> >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>> >>>> Yes. And shame on me for missing it (this time!) at review. >>>> >>>> There's still time to fix this. Patches, please. >>> >>> Here it is. Made at 3.30 AM, build tested only. >> >> Prefer on top of Maddy's patch which makes it always a variable, rather >> than CONFIG_DEBUG_FS. It's got enough hair as it is. > > Something like this? > > From: "Kirill A. Shutemov" > Date: Tue, 20 May 2014 13:02:03 +0300 > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > There are evidences that faultaround feature is less relevant on > architectures with page size bigger then 4k. Which makes sense since > page fault overhead per byte of mapped area should be less there. > > Let's rework the feature to specify faultaround area in bytes instead of > page order. It's 64 kilobytes for now. > > The patch effectively disables faultaround on architectures with > page size >= 64k (like ppc64). > > It's possible that some other size of faultaround area is relevant for a > platform. We can expose `fault_around_bytes' variable to arch-specific > code once such platforms will be found. > > Signed-off-by: Kirill A. Shutemov > --- > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > 1 file changed, 23 insertions(+), 39 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 037b812a9531..252b319e8cdf 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > update_mmu_cache(vma, address, pte); > } > > -#define FAULT_AROUND_ORDER 4 > +static unsigned long fault_around_bytes = 65536; > + > +static inline unsigned long fault_around_pages(void) > +{ > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > +} > + > +static inline unsigned long fault_around_mask(void) > +{ > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > +} > > -#ifdef CONFIG_DEBUG_FS > -static unsigned int fault_around_order = FAULT_AROUND_ORDER; > > -static int fault_around_order_get(void *data, u64 *val) > +#ifdef CONFIG_DEBUG_FS > +static int fault_around_bytes_get(void *data, u64 *val) > { > - *val = fault_around_order; > + *val = fault_around_bytes; > return 0; > } > > -static int fault_around_order_set(void *data, u64 val) > +static int fault_around_bytes_set(void *data, u64 val) > { Kindly ignore the question if not relevant. Even though we need root access to alter the value, will we be fine with negative value?. Regards Maddy > - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); > - if (1UL << val > PTRS_PER_PTE) > + if (val / PAGE_SIZE > PTRS_PER_PTE) > return -EINVAL; > - fault_around_order = val; > + fault_around_bytes = val; > return 0; > } > -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, > - fault_around_order_get, fault_around_order_set, "%llu\n"); > +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, > + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); > > static int __init fault_around_debugfs(void) > { > void *ret; > > - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, > - &fault_around_order_fops); > + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, > + &fault_around_bytes_fops); > if (!ret) > - pr_warn("Failed to create fault_around_order in debugfs"); > + pr_warn("Failed to create fault_around_bytes in debugfs"); > return 0; > } > late_initcall(fault_around_debugfs); > - > -static inline unsigned long fault_around_pages(void) > -{ > - return 1UL << fault_around_order; > -} > - > -static inline unsigned long fault_around_mask(void) > -{ > - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); > -} > -#else > -static inline unsigned long fault_around_pages(void) > -{ > - unsigned long nr_pages; > - > - nr_pages = 1UL << FAULT_AROUND_ORDER; > - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); > - return nr_pages; > -} > - > -static inline unsigned long fault_around_mask(void) > -{ > - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); > -} > #endif > > static void do_fault_around(struct vm_area_struct *vma, unsigned long address, > @@ -3515,7 +3499,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, > * if page by the offset is not ready to be mapped (cold cache or > * something). > */ > - if (vma->vm_ops->map_pages) { > + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { > pte = pte_offset_map_lock(mm, pmd, address, &ptl); > do_fault_around(vma, address, pte, pgoff, flags); > if (!pte_same(*pte, orig_pte)) > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com ([134.134.136.24]:37902 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752015AbaE0KWH (ORCPT ); Tue, 27 May 2014 06:22:07 -0400 From: "Kirill A. Shutemov" In-Reply-To: <53842FB1.7090909@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <53842FB1.7090909@linux.vnet.ibm.com> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Content-Transfer-Encoding: 7bit Message-ID: <20140527102200.012BBE009B@blue.fi.intel.com> Date: Tue, 27 May 2014 13:21:59 +0300 (EEST) Sender: linux-arch-owner@vger.kernel.org List-ID: To: Madhavan Srinivasan Cc: "Kirill A. Shutemov" , Rusty Russell , Andrew Morton , Hugh Dickins , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Message-ID: <20140527102159.HHfsvkr6l90c8n2-fFVaeFww9Xw4hccVSpQ6C8TscaI@z> Madhavan Srinivasan wrote: > On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: > > Rusty Russell wrote: > >> "Kirill A. Shutemov" writes: > >>> Andrew Morton wrote: > >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >>>> > >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >>>>> the order of the fault-around size in bytes, and fault_around_pages() > >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) > >>>> > >>>> Yes. And shame on me for missing it (this time!) at review. > >>>> > >>>> There's still time to fix this. Patches, please. > >>> > >>> Here it is. Made at 3.30 AM, build tested only. > >> > >> Prefer on top of Maddy's patch which makes it always a variable, rather > >> than CONFIG_DEBUG_FS. It's got enough hair as it is. > > > > Something like this? > > > > From: "Kirill A. Shutemov" > > Date: Tue, 20 May 2014 13:02:03 +0300 > > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > > > There are evidences that faultaround feature is less relevant on > > architectures with page size bigger then 4k. Which makes sense since > > page fault overhead per byte of mapped area should be less there. > > > > Let's rework the feature to specify faultaround area in bytes instead of > > page order. It's 64 kilobytes for now. > > > > The patch effectively disables faultaround on architectures with > > page size >= 64k (like ppc64). > > > > It's possible that some other size of faultaround area is relevant for a > > platform. We can expose `fault_around_bytes' variable to arch-specific > > code once such platforms will be found. > > > > Signed-off-by: Kirill A. Shutemov > > --- > > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > > 1 file changed, 23 insertions(+), 39 deletions(-) > > > > diff --git a/mm/memory.c b/mm/memory.c > > index 037b812a9531..252b319e8cdf 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > > update_mmu_cache(vma, address, pte); > > } > > > > -#define FAULT_AROUND_ORDER 4 > > +static unsigned long fault_around_bytes = 65536; > > + > > +static inline unsigned long fault_around_pages(void) > > +{ > > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > > +} > > + > > +static inline unsigned long fault_around_mask(void) > > +{ > > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > > +} > > > > -#ifdef CONFIG_DEBUG_FS > > -static unsigned int fault_around_order = FAULT_AROUND_ORDER; > > > > -static int fault_around_order_get(void *data, u64 *val) > > +#ifdef CONFIG_DEBUG_FS > > +static int fault_around_bytes_get(void *data, u64 *val) > > { > > - *val = fault_around_order; > > + *val = fault_around_bytes; > > return 0; > > } > > > > -static int fault_around_order_set(void *data, u64 val) > > +static int fault_around_bytes_set(void *data, u64 val) > > { > > Kindly ignore the question if not relevant. Even though we need root > access to alter the value, will we be fine with > negative value?. val is u64. or I miss something? -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp04.in.ibm.com (e28smtp04.in.ibm.com [122.248.162.4]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id B3023140094 for ; Thu, 8 May 2014 19:28:30 +1000 (EST) Received: from /spool/local by e28smtp04.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 8 May 2014 14:58:24 +0530 Received: from d28relay05.in.ibm.com (d28relay05.in.ibm.com [9.184.220.62]) by d28dlp01.in.ibm.com (Postfix) with ESMTP id 83E1FE0045 for ; Thu, 8 May 2014 14:58:49 +0530 (IST) Received: from d28av02.in.ibm.com (d28av02.in.ibm.com [9.184.220.64]) by d28relay05.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s489SUd53670388 for ; Thu, 8 May 2014 14:58:30 +0530 Received: from d28av02.in.ibm.com (localhost [127.0.0.1]) by d28av02.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s489SIia027839 for ; Thu, 8 May 2014 14:58:21 +0530 From: Madhavan Srinivasan To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Subject: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Date: Thu, 8 May 2014 14:58:14 +0530 Message-Id: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Cc: riel@redhat.com, ak@linux.intel.com, peterz@infradead.org, rusty@rustcorp.com.au, dave.hansen@intel.com, Madhavan Srinivasan , paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Kirill A. Shutemov with 8c6e50b029 commit introduced vm_ops->map_pages() for mapping easy accessible pages around fault address in hope to reduce number of minor page faults. This patch creates infrastructure to modify the FAULT_AROUND_ORDER value using mm/Kconfig. This will enable architecture maintainers to decide on suitable FAULT_AROUND_ORDER value based on performance data for that architecture. First patch also defaults FAULT_AROUND_ORDER Kconfig element to 4. Second patch list out the performance numbers for powerpc (platform pseries) and initialize the fault around order variable for pseries platform of powerpc. V4 Changes: Replaced the BUILD_BUG_ON with VM_BUG_ON. Moved fault_around_pages() and fault_around_mask() functions outside of #ifdef CONFIG_DEBUG_FS. V3 Changes: Replaced FAULT_AROUND_ORDER macro to a variable to support arch's that supports sub platforms. Made changes in commit messages. V2 Changes: Created Kconfig parameter for FAULT_AROUND_ORDER Added check in do_read_fault to handle FAULT_AROUND_ORDER value of 0 Made changes in commit messages. Madhavan Srinivasan (2): mm: move FAULT_AROUND_ORDER to arch/ powerpc/pseries: init fault_around_order for pseries arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 5 +++++ mm/Kconfig | 8 ++++++++ mm/memory.c | 25 ++++++------------------- 4 files changed, 21 insertions(+), 19 deletions(-) -- 1.7.10.4 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp08.in.ibm.com (e28smtp08.in.ibm.com [122.248.162.8]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 5C88E140094 for ; Thu, 8 May 2014 19:28:33 +1000 (EST) Received: from /spool/local by e28smtp08.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 8 May 2014 14:58:27 +0530 Received: from d28relay05.in.ibm.com (d28relay05.in.ibm.com [9.184.220.62]) by d28dlp03.in.ibm.com (Postfix) with ESMTP id 00F471258048 for ; Thu, 8 May 2014 14:57:22 +0530 (IST) Received: from d28av02.in.ibm.com (d28av02.in.ibm.com [9.184.220.64]) by d28relay05.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s489SWp65636442 for ; Thu, 8 May 2014 14:58:32 +0530 Received: from d28av02.in.ibm.com (localhost [127.0.0.1]) by d28av02.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s489SKfK028017 for ; Thu, 8 May 2014 14:58:23 +0530 From: Madhavan Srinivasan To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Subject: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries Date: Thu, 8 May 2014 14:58:16 +0530 Message-Id: <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Cc: riel@redhat.com, ak@linux.intel.com, peterz@infradead.org, rusty@rustcorp.com.au, dave.hansen@intel.com, Madhavan Srinivasan , paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Performance data for different FAULT_AROUND_ORDER values from 4 socket Power7 system (128 Threads and 128GB memory). perf stat with repeat of 5 is used to get the stddev values. Test ran in v3.14 kernel (Baseline) and v3.15-rc1 for different fault around order values. %change here is calculated in this method ((new value - baseline)/baseline). And negative %change says its a drop in time. FAULT_AROUND_ORDER Baseline 1 3 4 5 8 Linux build (make -j64) minor-faults 47,437,359 35,279,286 25,425,347 23,461,275 22,002,189 21,435,836 times in seconds 347.302528420 344.061588460 340.974022391 348.193508116 348.673900158 350.986543618 stddev for time ( +- 1.50% ) ( +- 0.73% ) ( +- 1.13% ) ( +- 1.01% ) ( +- 1.89% ) ( +- 1.55% ) %chg time to baseline -0.9% -1.8% 0.2% 0.39% 1.06% Linux rebuild (make -j64) minor-faults 941,552 718,319 486,625 440,124 410,510 397,416 times in seconds 30.569834718 31.219637539 31.319370649 31.434285472 31.972367174 31.443043580 stddev for time ( +- 1.07% ) ( +- 0.13% ) ( +- 0.43% ) ( +- 0.18% ) ( +- 0.95% ) ( +- 0.58% ) %chg time to baseline 2.1% 2.4% 2.8% 4.58% 2.85% Binutils build (make all -j64 ) minor-faults 474,821 371,380 269,463 247,715 235,255 228,337 times in seconds 53.882492432 53.584289348 53.882773216 53.755816431 53.607824348 53.423759642 stddev for time ( +- 0.08% ) ( +- 0.56% ) ( +- 0.17% ) ( +- 0.11% ) ( +- 0.60% ) ( +- 0.69% ) %chg time to baseline -0.55% 0.0% -0.23% -0.51% -0.85% Two synthetic tests: access every word in file in sequential/random order. Sequential access 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 263,148 131,166 32,908 16,514 8,260 1,093 times in seconds 53.091138345 53.113191672 53.188776177 53.233017218 53.206841347 53.429979442 stddev for time ( +- 0.06% ) ( +- 0.07% ) ( +- 0.08% ) ( +- 0.09% ) ( +- 0.03% ) ( +- 0.03% ) %chg time to baseline 0.04% 0.18% 0.26% 0.21% 0.63% 8 threads minor-faults 2,097,267 1,048,753 262,237 131,397 65,621 8,274 times in seconds 55.173790028 54.591880790 54.824623287 54.802162211 54.969680503 54.790387715 stddev for time ( +- 0.78% ) ( +- 0.09% ) ( +- 0.08% ) ( +- 0.07% ) ( +- 0.28% ) ( +- 0.05% ) %chg time to baseline -1.05% -0.63% -0.67% -0.36% -0.69% 32 threads minor-faults 8,388,751 4,195,621 1,049,664 525,461 262,535 32,924 times in seconds 60.431573046 60.669110744 60.485336388 60.697789706 60.077959564 60.588855032 stddev for time ( +- 0.44% ) ( +- 0.27% ) ( +- 0.46% ) ( +- 0.67% ) ( +- 0.31% ) ( +- 0.49% ) %chg time to baseline 0.39% 0.08% 0.44% -0.58% 0.25% 64 threads minor-faults 16,777,409 8,607,527 2,289,766 1,202,264 598,405 67,587 times in seconds 96.932617720 100.675418760 102.109880836 103.881733383 102.580199555 105.751194041 stddev for time ( +- 1.39% ) ( +- 1.06% ) ( +- 0.99% ) ( +- 0.76% ) ( +- 1.65% ) ( +- 1.60% ) %chg time to baseline 3.86% 5.34% 7.16% 5.82% 9.09% 128 threads minor-faults 33,554,705 17,375,375 4,682,462 2,337,245 1,179,007 134,819 times in seconds 128.766704495 115.659225437 120.353046307 115.291871270 115.450886036 113.991902150 stddev for time ( +- 2.93% ) ( +- 0.30% ) ( +- 2.93% ) ( +- 1.24% ) ( +- 1.03% ) ( +- 0.70% ) %chg time to baseline -10.17% -6.53% -10.46% -10.34% -11.47% Random access 1GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 17,155 8,678 2,126 1,097 581 134 times in seconds 51.904430523 51.658017987 51.919270792 51.560531738 52.354431597 51.976469502 stddev for time ( +- 3.19% ) ( +- 1.35% ) ( +- 1.56% ) ( +- 0.91% ) ( +- 1.70% ) ( +- 2.02% ) %chg time to baseline -0.47% 0.02% -0.66% 0.86% 0.13% 8 threads minor-faults 131,844 70,705 17,457 8,505 4,251 598 times in seconds 58.162813956 54.991706305 54.952675791 55.323057492 54.755587379 53.376722828 stddev for time ( +- 1.44% ) ( +- 0.69% ) ( +- 1.23% ) ( +- 2.78% ) ( +- 1.90% ) ( +- 2.91% ) %chg time to baseline -5.45% -5.52% -4.88% -5.86% -8.22% 32 threads minor-faults 524,437 270,760 67,069 33,414 16,641 2,204 times in seconds 69.981777072 76.539570015 79.753578505 76.245943618 77.254258344 79.072596831 stddev for time ( +- 2.81% ) ( +- 1.95% ) ( +- 2.66% ) ( +- 0.99% ) ( +- 2.35% ) ( +- 3.22% ) %chg time to baseline 9.37% 13.96% 8.95% 10.39% 12.98% 64 threads minor-faults 1,049,117 527,451 134,016 66,638 33,391 4,559 times in seconds 108.024517536 117.575067996 115.322659914 111.943998437 115.049450815 119.218450840 stddev for time ( +- 2.40% ) ( +- 1.77% ) ( +- 1.19% ) ( +- 3.29% ) ( +- 2.32% ) ( +- 1.42% ) %chg time to baseline 8.84% 6.75% 3.62% 6.5% 10.3% 128 threads minor-faults 2,097,440 1,054,360 267,042 133,328 66,532 8,652 times in seconds 155.055861167 153.059625968 152.449492156 151.024005282 150.844647770 155.954366718 stddev for time ( +- 1.32% ) ( +- 1.14% ) ( +- 1.32% ) ( +- 0.81% ) ( +- 0.75% ) ( +- 0.72% ) %chg time to baseline -1.28% -1.68% -2.59% -2.71% 0.57% In case of kernel build, fault around order (fao) value of 1 and 3 wins when compared to 4 (but bit noisy). Incase of kernel rebuild, slowdown for fao > 0 is seen. Incase of synthetic test, there are sporadic agains, but mostly slowdown. No clear sweet spot fao value that can be suggested for the ppc64/pseries with the current performance data. Hence, patch suggest value of zero to the fao. Worst case scenario: we touch one page every 16M to demonstrate overhead. Touch only one page in page table in 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 8 1 thread minor-faults 1,104 1,090 1,071 1,068 1,065 1,063 times in seconds 0.006583298 0.008531502 0.019733795 0.036033763 0.062300553 0.406857086 stddev for time ( +- 2.79% ) ( +- 2.42% ) ( +- 3.47% ) ( +- 2.81% ) ( +- 2.01% ) ( +- 1.33% ) 8 threads minor-faults 8,279 8,264 8,245 8,243 8,239 8,240 times in seconds 0.044572398 0.057211811 0.107606306 0.205626815 0.381679120 2.647979955 stddev for time ( +- 1.95% ) ( +- 2.98% ) ( +- 1.74% ) ( +- 2.80% ) ( +- 2.01% ) ( +- 1.86% ) 32 threads minor-faults 32,879 32,864 32,849 32,845 32,839 32,843 times in seconds 0.197659343 0.218486087 0.445116407 0.694235883 1.296894038 9.127517045 stddev for time ( +- 3.05% ) ( +- 3.05% ) ( +- 4.33% ) ( +- 3.08% ) ( +- 3.75% ) ( +- 0.56% ) 64 threads minor-faults 65,680 65,664 65,646 65,645 65,640 65,647 times in seconds 0.455537304 0.489688780 0.866490093 1.427393118 2.379628982 17.059295051 stddev for time ( +- 4.01% ) ( +- 4.13% ) ( +- 2.92% ) ( +- 1.68% ) ( +- 1.79% ) ( +- 0.48% ) 128 threads minor-faults 131,279 131,265 131,250 131,245 131,241 131,254 times in seconds 1.026880651 1.095327536 1.721728274 2.808233068 4.662729948 31.732848290 stddev for time ( +- 6.85% ) ( +- 4.09% ) ( +- 1.71% ) ( +- 3.45% ) ( +- 2.40% ) ( +- 0.68% ) Signed-off-by: Madhavan Srinivasan --- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 9921953..6e6c993 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -17,6 +17,8 @@ struct device_node; extern void request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name); +extern unsigned int fault_around_order; + #include extern void __init fw_hypertas_feature_init(const char *hypertas, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 2db8cc6..4391c3c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -465,6 +465,11 @@ static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + /* + * Defaulting to zero since no sweet spot value found in the performance test. + */ + fault_around_order = 0; + /* Discover PIC type and setup ppc_md accordingly */ pseries_discover_pic(); -- 1.7.10.4 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp07.in.ibm.com (e28smtp07.in.ibm.com [122.248.162.7]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id BFBD014017A for ; Thu, 8 May 2014 19:28:52 +1000 (EST) Received: from /spool/local by e28smtp07.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 8 May 2014 14:58:47 +0530 Received: from d28relay01.in.ibm.com (d28relay01.in.ibm.com [9.184.220.58]) by d28dlp01.in.ibm.com (Postfix) with ESMTP id 01711E0057 for ; Thu, 8 May 2014 14:59:11 +0530 (IST) Received: from d28av02.in.ibm.com (d28av02.in.ibm.com [9.184.220.64]) by d28relay01.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s489SUe917039502 for ; Thu, 8 May 2014 14:58:31 +0530 Received: from d28av02.in.ibm.com (localhost [127.0.0.1]) by d28av02.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s489SJ3s027904 for ; Thu, 8 May 2014 14:58:22 +0530 From: Madhavan Srinivasan To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Subject: [PATCH V4 1/2] mm: move FAULT_AROUND_ORDER to arch/ Date: Thu, 8 May 2014 14:58:15 +0530 Message-Id: <1399541296-18810-2-git-send-email-maddy@linux.vnet.ibm.com> In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Cc: riel@redhat.com, ak@linux.intel.com, peterz@infradead.org, rusty@rustcorp.com.au, dave.hansen@intel.com, Madhavan Srinivasan , paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Kirill A. Shutemov with 8c6e50b029 commit introduced vm_ops->map_pages() for mapping easy accessible pages around fault address in hope to reduce number of minor page faults. This patch creates infrastructure to modify the FAULT_AROUND_ORDER value using mm/Kconfig. This will enable architecture maintainers to decide on suitable FAULT_AROUND_ORDER value based on performance data for that architecture. Patch also defaults FAULT_AROUND_ORDER Kconfig element to 4. Signed-off-by: Madhavan Srinivasan --- mm/Kconfig | 8 ++++++++ mm/memory.c | 25 ++++++------------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index ebe5880..c7fc4f1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -176,6 +176,14 @@ config MOVABLE_NODE config HAVE_BOOTMEM_INFO_NODE def_bool n +# +# Fault around order is a control knob to decide the fault around pages. +# Default value is set to 4 , but the arch can override it as desired. +# +config FAULT_AROUND_ORDER + int + default 4 + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" diff --git a/mm/memory.c b/mm/memory.c index 037b812..e3931ef 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,11 +3402,9 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +unsigned int fault_around_order __read_mostly = CONFIG_FAULT_AROUND_ORDER; #ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; - static int fault_around_order_get(void *data, u64 *val) { *val = fault_around_order; @@ -3415,7 +3413,6 @@ static int fault_around_order_get(void *data, u64 *val) static int fault_around_order_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); if (1UL << val > PTRS_PER_PTE) return -EINVAL; fault_around_order = val; @@ -3435,31 +3432,21 @@ static int __init fault_around_debugfs(void) return 0; } late_initcall(fault_around_debugfs); +#endif static inline unsigned long fault_around_pages(void) { - return 1UL << fault_around_order; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); -} -#else -static inline unsigned long fault_around_pages(void) -{ unsigned long nr_pages; - nr_pages = 1UL << FAULT_AROUND_ORDER; - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); + nr_pages = 1UL << fault_around_order; + VM_BUG_ON(nr_pages > PTRS_PER_PTE); return nr_pages; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); + return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); } -#endif static void do_fault_around(struct vm_area_struct *vma, unsigned long address, pte_t *pte, pgoff_t pgoff, unsigned int flags) @@ -3515,7 +3502,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if ((vma->vm_ops->map_pages) && fault_around_order) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- 1.7.10.4 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp04.in.ibm.com (e28smtp04.in.ibm.com [122.248.162.4]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id F19D114007D for ; Thu, 15 May 2014 18:25:20 +1000 (EST) Received: from /spool/local by e28smtp04.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Thu, 15 May 2014 13:55:17 +0530 Received: from d28relay04.in.ibm.com (d28relay04.in.ibm.com [9.184.220.61]) by d28dlp01.in.ibm.com (Postfix) with ESMTP id 8F4AEE0057 for ; Thu, 15 May 2014 13:55:50 +0530 (IST) Received: from d28av03.in.ibm.com (d28av03.in.ibm.com [9.184.220.65]) by d28relay04.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4F8PNB655902306 for ; Thu, 15 May 2014 13:55:23 +0530 Received: from d28av03.in.ibm.com (localhost [127.0.0.1]) by d28av03.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4F8PC27001766 for ; Thu, 15 May 2014 13:55:14 +0530 Message-ID: <537479E7.90806@linux.vnet.ibm.com> Date: Thu, 15 May 2014 13:55:11 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 To: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> In-Reply-To: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> Content-Type: text/plain; charset=ISO-8859-1 Cc: riel@redhat.com, ak@linux.intel.com, peterz@infradead.org, rusty@rustcorp.com.au, dave.hansen@intel.com, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Hi Ingo, Do you have any comments for the latest version of the patchset. If not, kindly can you pick it up as is. With regards Maddy > Kirill A. Shutemov with 8c6e50b029 commit introduced > vm_ops->map_pages() for mapping easy accessible pages around > fault address in hope to reduce number of minor page faults. > > This patch creates infrastructure to modify the FAULT_AROUND_ORDER > value using mm/Kconfig. This will enable architecture maintainers > to decide on suitable FAULT_AROUND_ORDER value based on > performance data for that architecture. First patch also defaults > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > out the performance numbers for powerpc (platform pseries) and > initialize the fault around order variable for pseries platform of > powerpc. > > V4 Changes: > Replaced the BUILD_BUG_ON with VM_BUG_ON. > Moved fault_around_pages() and fault_around_mask() functions outside of > #ifdef CONFIG_DEBUG_FS. > > V3 Changes: > Replaced FAULT_AROUND_ORDER macro to a variable to support arch's that > supports sub platforms. > Made changes in commit messages. > > V2 Changes: > Created Kconfig parameter for FAULT_AROUND_ORDER > Added check in do_read_fault to handle FAULT_AROUND_ORDER value of 0 > Made changes in commit messages. > > Madhavan Srinivasan (2): > mm: move FAULT_AROUND_ORDER to arch/ > powerpc/pseries: init fault_around_order for pseries > > arch/powerpc/platforms/pseries/pseries.h | 2 ++ > arch/powerpc/platforms/pseries/setup.c | 5 +++++ > mm/Kconfig | 8 ++++++++ > mm/memory.c | 25 ++++++------------------- > 4 files changed, 21 insertions(+), 19 deletions(-) > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pb0-x231.google.com (mail-pb0-x231.google.com [IPv6:2607:f8b0:400e:c01::231]) (using TLSv1 with cipher ECDHE-RSA-RC4-SHA (128/128 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 361311400DD for ; Fri, 16 May 2014 03:29:38 +1000 (EST) Received: by mail-pb0-f49.google.com with SMTP id jt11so1364488pbb.22 for ; Thu, 15 May 2014 10:29:34 -0700 (PDT) Date: Thu, 15 May 2014 10:28:17 -0700 (PDT) From: Hugh Dickins To: Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <537479E7.90806@linux.vnet.ibm.com> Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Cc: linux-arch@vger.kernel.org, riel@redhat.com, rusty@rustcorp.com.au, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Thu, 15 May 2014, Madhavan Srinivasan wrote: > > Hi Ingo, > > Do you have any comments for the latest version of the patchset. If > not, kindly can you pick it up as is. > > > With regards > Maddy > > > Kirill A. Shutemov with 8c6e50b029 commit introduced > > vm_ops->map_pages() for mapping easy accessible pages around > > fault address in hope to reduce number of minor page faults. > > > > This patch creates infrastructure to modify the FAULT_AROUND_ORDER > > value using mm/Kconfig. This will enable architecture maintainers > > to decide on suitable FAULT_AROUND_ORDER value based on > > performance data for that architecture. First patch also defaults > > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > > out the performance numbers for powerpc (platform pseries) and > > initialize the fault around order variable for pseries platform of > > powerpc. Sorry for not commenting earlier - just reminded by this ping to Ingo. I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. arch/powerpc/Kconfig suggests that Power supports base page size of 4k, 16k, 64k or 256k. I would expect your optimal fault_around_order to depend very much on the base page size. Perhaps fault_around_size would provide a more useful default? Hugh From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Rusty Russell To: Hugh Dickins , Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> Date: Mon, 19 May 2014 09:42:46 +0930 Message-ID: <87wqdik4n5.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Cc: linux-arch@vger.kernel.org, riel@redhat.com, ak@linux.intel.com, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Hugh Dickins writes: > On Thu, 15 May 2014, Madhavan Srinivasan wrote: >> >> Hi Ingo, >> >> Do you have any comments for the latest version of the patchset. If >> not, kindly can you pick it up as is. >> >> >> With regards >> Maddy >> >> > Kirill A. Shutemov with 8c6e50b029 commit introduced >> > vm_ops->map_pages() for mapping easy accessible pages around >> > fault address in hope to reduce number of minor page faults. >> > >> > This patch creates infrastructure to modify the FAULT_AROUND_ORDER >> > value using mm/Kconfig. This will enable architecture maintainers >> > to decide on suitable FAULT_AROUND_ORDER value based on >> > performance data for that architecture. First patch also defaults >> > FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >> > out the performance numbers for powerpc (platform pseries) and >> > initialize the fault around order variable for pseries platform of >> > powerpc. > > Sorry for not commenting earlier - just reminded by this ping to Ingo. > > I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. > > arch/powerpc/Kconfig suggests that Power supports base page size of > 4k, 16k, 64k or 256k. > > I would expect your optimal fault_around_order to depend very much on > the base page size. It was 64k, which is what PPC64 uses on all the major distributions. You really only get a choice of 4k and 64k with 64 bit power. > Perhaps fault_around_size would provide a more useful default? That seems to fit. With 4k pages and order 4, you're asking for 64k. Maddy's result shows 64k is also reasonable for 64k pages. Perhaps we try to generalize from two data points (a slight improvement over doing it from 1!), eg: /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ unsigned int fault_around_order __read_mostly = (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); Cheers, Rusty. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e23smtp05.au.ibm.com (e23smtp05.au.ibm.com [202.81.31.147]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id B368D1A00A5 for ; Mon, 19 May 2014 13:06:09 +1000 (EST) Received: from /spool/local by e23smtp05.au.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Mon, 19 May 2014 13:06:05 +1000 Received: from d23relay05.au.ibm.com (d23relay05.au.ibm.com [9.190.235.152]) by d23dlp01.au.ibm.com (Postfix) with ESMTP id 0A9192CE8055 for ; Mon, 19 May 2014 13:06:00 +1000 (EST) Received: from d23av03.au.ibm.com (d23av03.au.ibm.com [9.190.234.97]) by d23relay05.au.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4J2iQSb6226366 for ; Mon, 19 May 2014 12:44:27 +1000 Received: from d23av03.au.ibm.com (localhost [127.0.0.1]) by d23av03.au.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4J35wkq018244 for ; Mon, 19 May 2014 13:05:59 +1000 Message-ID: <53797511.1050409@linux.vnet.ibm.com> Date: Mon, 19 May 2014 08:35:53 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 To: Rusty Russell , Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> In-Reply-To: <87wqdik4n5.fsf@rustcorp.com.au> Content-Type: text/plain; charset=ISO-8859-1 Cc: linux-arch@vger.kernel.org, riel@redhat.com, ak@linux.intel.com, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > Hugh Dickins writes: >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >>> >>> Hi Ingo, >>> >>> Do you have any comments for the latest version of the patchset. If >>> not, kindly can you pick it up as is. >>> >>> >>> With regards >>> Maddy >>> >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >>>> vm_ops->map_pages() for mapping easy accessible pages around >>>> fault address in hope to reduce number of minor page faults. >>>> >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >>>> value using mm/Kconfig. This will enable architecture maintainers >>>> to decide on suitable FAULT_AROUND_ORDER value based on >>>> performance data for that architecture. First patch also defaults >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >>>> out the performance numbers for powerpc (platform pseries) and >>>> initialize the fault around order variable for pseries platform of >>>> powerpc. >> >> Sorry for not commenting earlier - just reminded by this ping to Ingo. >> >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >> >> arch/powerpc/Kconfig suggests that Power supports base page size of >> 4k, 16k, 64k or 256k. >> >> I would expect your optimal fault_around_order to depend very much on >> the base page size. > > It was 64k, which is what PPC64 uses on all the major distributions. > You really only get a choice of 4k and 64k with 64 bit power. > This is true. PPC64 support multiple pagesize and yes the default page size of 64k, is taken as base pagesize for the tests. >> Perhaps fault_around_size would provide a more useful default? > > That seems to fit. With 4k pages and order 4, you're asking for 64k. > Maddy's result shows 64k is also reasonable for 64k pages. > > Perhaps we try to generalize from two data points (a slight improvement > over doing it from 1!), eg: > > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > unsigned int fault_around_order __read_mostly = > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > This may be right. But these are the concerns, will not this make other arch to pick default without any tuning and also this will remove the compile time option to disable the feature? Thanks for review With regards Maddy > Cheers, > Rusty. > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-x232.google.com (mail-pa0-x232.google.com [IPv6:2607:f8b0:400e:c03::232]) (using TLSv1 with cipher ECDHE-RSA-RC4-SHA (128/128 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 206D41A007E for ; Tue, 20 May 2014 09:24:26 +1000 (EST) Received: by mail-pa0-f50.google.com with SMTP id fb1so6433496pad.23 for ; Mon, 19 May 2014 16:24:23 -0700 (PDT) Date: Mon, 19 May 2014 16:23:07 -0700 (PDT) From: Hugh Dickins To: Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <53797511.1050409@linux.vnet.ibm.com> Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Cc: linux-arch@vger.kernel.org, riel@redhat.com, x86@kernel.org, dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Mon, 19 May 2014, Madhavan Srinivasan wrote: > On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > > Hugh Dickins writes: > >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: > >>> > >>> Hi Ingo, > >>> > >>> Do you have any comments for the latest version of the patchset. If > >>> not, kindly can you pick it up as is. > >>> > >>> > >>> With regards > >>> Maddy > >>> > >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced > >>>> vm_ops->map_pages() for mapping easy accessible pages around > >>>> fault address in hope to reduce number of minor page faults. > >>>> > >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER > >>>> value using mm/Kconfig. This will enable architecture maintainers > >>>> to decide on suitable FAULT_AROUND_ORDER value based on > >>>> performance data for that architecture. First patch also defaults > >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list > >>>> out the performance numbers for powerpc (platform pseries) and > >>>> initialize the fault around order variable for pseries platform of > >>>> powerpc. > >> > >> Sorry for not commenting earlier - just reminded by this ping to Ingo. > >> > >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. > >> > >> arch/powerpc/Kconfig suggests that Power supports base page size of > >> 4k, 16k, 64k or 256k. > >> > >> I would expect your optimal fault_around_order to depend very much on > >> the base page size. > > > > It was 64k, which is what PPC64 uses on all the major distributions. > > You really only get a choice of 4k and 64k with 64 bit power. > > > This is true. PPC64 support multiple pagesize and yes the default page > size of 64k, is taken as base pagesize for the tests. > > >> Perhaps fault_around_size would provide a more useful default? > > > > That seems to fit. With 4k pages and order 4, you're asking for 64k. > > Maddy's result shows 64k is also reasonable for 64k pages. > > > > Perhaps we try to generalize from two data points (a slight improvement > > over doing it from 1!), eg: > > > > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > > unsigned int fault_around_order __read_mostly = > > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); Rusty's bimodal answer doesn't seem the right starting point to me. Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be the order of the fault-around size in bytes, and fault_around_pages() use 1UL << (fault_around_order - PAGE_SHIFT) - when that doesn't wrap, of course! That would at least have a better chance of being appropriate for architectures with 8k and 16k pages (Itanium springs to mind). Not necessarily right for them, since each architecture may have different faulting overheads; but a better chance of being right than blindly assuming 4k or 64k pages for everyone. I'd be glad to see that change go into v3.15: what do you think, Kirill, are we too late to make such a change now? Or do you see some objection to it? > This may be right. But these are the concerns, will not this make other > arch to pick default without any tuning Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? Did other architectures, with other page sizes, back that default? Clearly not powerpc. > and also this will remove the > compile time option to disable the feature? Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig for v3.16? I'm not sure whether Rusty was arguing against that or not. I think we are all three concerned to have a more sensible default than what's there at present. I don't feel very strongly about your Kconfig option: I've no objection, if it were to default to byte order 16. Hugh From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) by lists.ozlabs.org (Postfix) with ESMTP id 597D51A0815 for ; Tue, 20 May 2014 09:43:04 +1000 (EST) Date: Mon, 19 May 2014 16:43:01 -0700 From: Andrew Morton To: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> In-Reply-To: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Cc: linux-arch@vger.kernel.org, x86@kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) Yes. And shame on me for missing it (this time!) at review. There's still time to fix this. Patches, please. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by lists.ozlabs.org (Postfix) with ESMTP id 7638F1A0824 for ; Tue, 20 May 2014 10:44:38 +1000 (EST) From: "Kirill A. Shutemov" To: Andrew Morton In-Reply-To: <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140520004429.E660AE009B@blue.fi.intel.com> Date: Tue, 20 May 2014 03:44:29 +0300 (EEST) Cc: linux-arch@vger.kernel.org, x86@kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Andrew Morton wrote: > On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > the order of the fault-around size in bytes, and fault_around_pages() > > use 1UL << (fault_around_order - PAGE_SHIFT) > > Yes. And shame on me for missing it (this time!) at review. > > There's still time to fix this. Patches, please. Here it is. Made at 3.30 AM, build tested only. I'll sign it off tomorrow after testing. diff --git a/mm/memory.c b/mm/memory.c index 037b812a9531..9d6941c9a9e4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,62 +3402,62 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +#define FAULT_AROUND_BYTES 65536 #ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; +static unsigned int fault_around_bytes = FAULT_AROUND_BYTES; -static int fault_around_order_get(void *data, u64 *val) +static int fault_around_bytes_get(void *data, u64 *val) { - *val = fault_around_order; + *val = fault_around_bytes; return 0; } -static int fault_around_order_set(void *data, u64 val) +static int fault_around_bytes_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); - if (1UL << val > PTRS_PER_PTE) + BUILD_BUG_ON(FAULT_AROUND_BYTES / PAGE_SIZE > PTRS_PER_PTE); + if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; - fault_around_order = val; + fault_around_bytes = val; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, - fault_around_order_get, fault_around_order_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { void *ret; - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, - &fault_around_order_fops); + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, + &fault_around_bytes_fops); if (!ret) - pr_warn("Failed to create fault_around_order in debugfs"); + pr_warn("Failed to create fault_around_bytes in debugfs"); return 0; } late_initcall(fault_around_debugfs); static inline unsigned long fault_around_pages(void) { - return 1UL << fault_around_order; + return fault_around_bytes / PAGE_SIZE; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); + return ~(round_down(fault_around_bytes, PAGE_SIZE) - 1); } #else static inline unsigned long fault_around_pages(void) { unsigned long nr_pages; - nr_pages = 1UL << FAULT_AROUND_ORDER; + nr_pages = FAULT_AROUND_BYTES / PAGE_SIZE; BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); return nr_pages; } static inline unsigned long fault_around_mask(void) { - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); + return ~(round_down(FAULT_AROUND_BYTES, PAGE_SIZE) - 1); } #endif @@ -3515,7 +3515,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e23smtp06.au.ibm.com (e23smtp06.au.ibm.com [202.81.31.148]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 246A01A0846 for ; Tue, 20 May 2014 12:06:59 +1000 (EST) Received: from /spool/local by e23smtp06.au.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 12:06:59 +1000 Received: from d23relay05.au.ibm.com (d23relay05.au.ibm.com [9.190.235.152]) by d23dlp02.au.ibm.com (Postfix) with ESMTP id 36C7A2BB0055 for ; Tue, 20 May 2014 12:06:57 +1000 (EST) Received: from d23av03.au.ibm.com (d23av03.au.ibm.com [9.190.234.97]) by d23relay05.au.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4K1jM5K35848366 for ; Tue, 20 May 2014 11:45:23 +1000 Received: from d23av03.au.ibm.com (localhost [127.0.0.1]) by d23av03.au.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4K26t4V018775 for ; Tue, 20 May 2014 12:06:56 +1000 Message-ID: <537AB8B2.3040000@linux.vnet.ibm.com> Date: Tue, 20 May 2014 07:36:42 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 To: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> In-Reply-To: Content-Type: text/plain; charset=ISO-8859-1 Cc: linux-arch@vger.kernel.org, riel@redhat.com, x86@kernel.org, dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Tuesday 20 May 2014 04:53 AM, Hugh Dickins wrote: > On Mon, 19 May 2014, Madhavan Srinivasan wrote: >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: >>> Hugh Dickins writes: >>>> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >>>>> >>>>> Hi Ingo, >>>>> >>>>> Do you have any comments for the latest version of the patchset. If >>>>> not, kindly can you pick it up as is. >>>>> >>>>> >>>>> With regards >>>>> Maddy >>>>> >>>>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >>>>>> vm_ops->map_pages() for mapping easy accessible pages around >>>>>> fault address in hope to reduce number of minor page faults. >>>>>> >>>>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >>>>>> value using mm/Kconfig. This will enable architecture maintainers >>>>>> to decide on suitable FAULT_AROUND_ORDER value based on >>>>>> performance data for that architecture. First patch also defaults >>>>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >>>>>> out the performance numbers for powerpc (platform pseries) and >>>>>> initialize the fault around order variable for pseries platform of >>>>>> powerpc. >>>> >>>> Sorry for not commenting earlier - just reminded by this ping to Ingo. >>>> >>>> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >>>> >>>> arch/powerpc/Kconfig suggests that Power supports base page size of >>>> 4k, 16k, 64k or 256k. >>>> >>>> I would expect your optimal fault_around_order to depend very much on >>>> the base page size. >>> >>> It was 64k, which is what PPC64 uses on all the major distributions. >>> You really only get a choice of 4k and 64k with 64 bit power. >>> >> This is true. PPC64 support multiple pagesize and yes the default page >> size of 64k, is taken as base pagesize for the tests. >> >>>> Perhaps fault_around_size would provide a more useful default? >>> >>> That seems to fit. With 4k pages and order 4, you're asking for 64k. >>> Maddy's result shows 64k is also reasonable for 64k pages. >>> >>> Perhaps we try to generalize from two data points (a slight improvement >>> over doing it from 1!), eg: >>> >>> /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ >>> unsigned int fault_around_order __read_mostly = >>> (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > Rusty's bimodal answer doesn't seem the right starting point to me. > > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) > - when that doesn't wrap, of course! > > That would at least have a better chance of being appropriate for > architectures with 8k and 16k pages (Itanium springs to mind). > > Not necessarily right for them, since each architecture may have > different faulting overheads; but a better chance of being right > than blindly assuming 4k or 64k pages for everyone. > > I'd be glad to see that change go into v3.15: what do you think, > Kirill, are we too late to make such a change now? > Or do you see some objection to it? > >> This may be right. But these are the concerns, will not this make other >> arch to pick default without any tuning > > Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? > Did other architectures, with other page sizes, back that default? > Clearly not powerpc. Ok. > >> and also this will remove the >> compile time option to disable the feature? > > Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig > for v3.16? > > I'm not sure whether Rusty was arguing against that or not I think > we are all three concerned to have a more sensible default than what's > there at present. I don't feel very strongly about your Kconfig Added it as one way to reset or disable the default value. But then I guess we decided on having FAULT_AROUND_ORDER as a variable which is more important than Kconfig option. > option: I've no objection, if it were to default to byte order 16. > Thanks for review With regards Maddy > Hugh > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ozlabs.org (ozlabs.org [103.22.144.67]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id EB6931A0843 for ; Tue, 20 May 2014 12:10:04 +1000 (EST) From: Rusty Russell To: Hugh Dickins , Madhavan Srinivasan Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Date: Tue, 20 May 2014 10:44:06 +0930 Message-ID: <87d2f9jlpd.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Cc: linux-arch@vger.kernel.org, riel@redhat.com, ak@linux.intel.com, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Hugh Dickins writes: > On Mon, 19 May 2014, Madhavan Srinivasan wrote: >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: >> > Hugh Dickins writes: >> >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >> >>> >> >>> Hi Ingo, >> >>> >> >>> Do you have any comments for the latest version of the patchset. If >> >>> not, kindly can you pick it up as is. >> >>> >> >>> >> >>> With regards >> >>> Maddy >> >>> >> >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >> >>>> vm_ops->map_pages() for mapping easy accessible pages around >> >>>> fault address in hope to reduce number of minor page faults. >> >>>> >> >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >> >>>> value using mm/Kconfig. This will enable architecture maintainers >> >>>> to decide on suitable FAULT_AROUND_ORDER value based on >> >>>> performance data for that architecture. First patch also defaults >> >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >> >>>> out the performance numbers for powerpc (platform pseries) and >> >>>> initialize the fault around order variable for pseries platform of >> >>>> powerpc. >> >> >> >> Sorry for not commenting earlier - just reminded by this ping to Ingo. >> >> >> >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >> >> >> >> arch/powerpc/Kconfig suggests that Power supports base page size of >> >> 4k, 16k, 64k or 256k. >> >> >> >> I would expect your optimal fault_around_order to depend very much on >> >> the base page size. >> > >> > It was 64k, which is what PPC64 uses on all the major distributions. >> > You really only get a choice of 4k and 64k with 64 bit power. >> > >> This is true. PPC64 support multiple pagesize and yes the default page >> size of 64k, is taken as base pagesize for the tests. >> >> >> Perhaps fault_around_size would provide a more useful default? >> > >> > That seems to fit. With 4k pages and order 4, you're asking for 64k. >> > Maddy's result shows 64k is also reasonable for 64k pages. >> > >> > Perhaps we try to generalize from two data points (a slight improvement >> > over doing it from 1!), eg: >> > >> > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ >> > unsigned int fault_around_order __read_mostly = >> > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > Rusty's bimodal answer doesn't seem the right starting point to me. ? It's not bimodal, it's graded. I think you misread? > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) > - when that doesn't wrap, of course! > > That would at least have a better chance of being appropriate for > architectures with 8k and 16k pages (Itanium springs to mind). Well, from our two data points it seems that we want to fault in 64k at a time whatever our page size. Perhaps it's clearer if the code expresses itself that way. > Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? > Did other architectures, with other page sizes, back that default? > Clearly not powerpc. Yeah, BenH flagged it as "we should test this" for powerpc, which is what Maddy then did. >> and also this will remove the >> compile time option to disable the feature? > > Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig > for v3.16? > > I'm not sure whether Rusty was arguing against that or not. I think > we are all three concerned to have a more sensible default than what's > there at present. I don't feel very strongly about your Kconfig > option: I've no objection, if it were to default to byte order 16. I don't mind either. Cheers, Rusty. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pb0-x229.google.com (mail-pb0-x229.google.com [IPv6:2607:f8b0:400e:c01::229]) (using TLSv1 with cipher ECDHE-RSA-RC4-SHA (128/128 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id DBF551A0848 for ; Tue, 20 May 2014 12:35:47 +1000 (EST) Received: by mail-pb0-f41.google.com with SMTP id uo5so6646794pbc.28 for ; Mon, 19 May 2014 19:35:44 -0700 (PDT) Date: Mon, 19 May 2014 19:34:27 -0700 (PDT) From: Hugh Dickins To: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <87d2f9jlpd.fsf@rustcorp.com.au> Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <87d2f9jlpd.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Cc: linux-arch@vger.kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, akpm@linux-foundation.org, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Tue, 20 May 2014, Rusty Russell wrote: > Hugh Dickins writes: > >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > >> > > >> > Perhaps we try to generalize from two data points (a slight improvement > >> > over doing it from 1!), eg: > >> > > >> > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > >> > unsigned int fault_around_order __read_mostly = > >> > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > > > Rusty's bimodal answer doesn't seem the right starting point to me. > > ? It's not bimodal, it's graded. I think you misread? Yikes, worse than misread, more like I was too rude even to read: sorry! Hugh From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ozlabs.org (ozlabs.org [103.22.144.67]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 9D0A51A084C for ; Tue, 20 May 2014 17:17:07 +1000 (EST) From: Rusty Russell To: "Kirill A. Shutemov" , Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <20140520004429.E660AE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> Date: Tue, 20 May 2014 15:52:07 +0930 Message-ID: <87oaythsvk.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Cc: linux-arch@vger.kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , "Kirill A. Shutemov" writes: > Andrew Morton wrote: >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >> >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >> > the order of the fault-around size in bytes, and fault_around_pages() >> > use 1UL << (fault_around_order - PAGE_SHIFT) >> >> Yes. And shame on me for missing it (this time!) at review. >> >> There's still time to fix this. Patches, please. > > Here it is. Made at 3.30 AM, build tested only. Prefer on top of Maddy's patch which makes it always a variable, rather than CONFIG_DEBUG_FS. It's got enough hair as it is. Cheers, Rusty. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) by lists.ozlabs.org (Postfix) with ESMTP id 954FF1A084D for ; Tue, 20 May 2014 17:28:57 +1000 (EST) Date: Tue, 20 May 2014 00:28:34 -0700 From: Andrew Morton To: Madhavan Srinivasan Subject: Re: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries Message-Id: <20140520002834.aefb5a90.akpm@linux-foundation.org> In-Reply-To: <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Cc: linux-arch@vger.kernel.org, riel@redhat.com, rusty@rustcorp.com.au, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Thu, 8 May 2014 14:58:16 +0530 Madhavan Srinivasan wrote: > --- a/arch/powerpc/platforms/pseries/pseries.h > +++ b/arch/powerpc/platforms/pseries/pseries.h > @@ -17,6 +17,8 @@ struct device_node; > extern void request_event_sources_irqs(struct device_node *np, > irq_handler_t handler, const char *name); > > +extern unsigned int fault_around_order; This isn't an appropriate header file for exporting something from core mm - what happens if arch/mn10300 wants it?. I guess include/linux/mm.h is the place. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) by lists.ozlabs.org (Postfix) with ESMTP id 0454D1A0816 for ; Tue, 20 May 2014 17:32:24 +1000 (EST) Date: Tue, 20 May 2014 00:32:01 -0700 From: Andrew Morton To: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140520003201.a2360d5d.akpm@linux-foundation.org> In-Reply-To: <87oaythsvk.fsf@rustcorp.com.au> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Cc: linux-arch@vger.kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Tue, 20 May 2014 15:52:07 +0930 Rusty Russell wrote: > "Kirill A. Shutemov" writes: > > Andrew Morton wrote: > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >> > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >> > the order of the fault-around size in bytes, and fault_around_pages() > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > >> > >> Yes. And shame on me for missing it (this time!) at review. > >> > >> There's still time to fix this. Patches, please. > > > > Here it is. Made at 3.30 AM, build tested only. > > Prefer on top of Maddy's patch which makes it always a variable, rather > than CONFIG_DEBUG_FS. It's got enough hair as it is. > We're at 3.15-rc5 and this interface should be finalised for 3.16. So Kirrill's patch is pretty urgent and should come first. Well. It's only a debugfs interface at this stage so we are allowed to change it later, but it's better not to. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp03.in.ibm.com (e28smtp03.in.ibm.com [122.248.162.3]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id AF04D1A084F for ; Tue, 20 May 2014 17:53:16 +1000 (EST) Received: from /spool/local by e28smtp03.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 13:23:12 +0530 Received: from d28relay05.in.ibm.com (d28relay05.in.ibm.com [9.184.220.62]) by d28dlp02.in.ibm.com (Postfix) with ESMTP id 54FAB394004C for ; Tue, 20 May 2014 13:23:08 +0530 (IST) Received: from d28av03.in.ibm.com (d28av03.in.ibm.com [9.184.220.65]) by d28relay05.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4K7rIEg2097586 for ; Tue, 20 May 2014 13:23:18 +0530 Received: from d28av03.in.ibm.com (localhost [127.0.0.1]) by d28av03.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4K7r6O7005316 for ; Tue, 20 May 2014 13:23:07 +0530 Message-ID: <537B09DF.1090906@linux.vnet.ibm.com> Date: Tue, 20 May 2014 13:23:03 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 To: Andrew Morton , Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520003201.a2360d5d.akpm@linux-foundation.org> In-Reply-To: <20140520003201.a2360d5d.akpm@linux-foundation.org> Content-Type: text/plain; charset=ISO-8859-1 Cc: linux-arch@vger.kernel.org, riel@redhat.com, ak@linux.intel.com, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Tuesday 20 May 2014 01:02 PM, Andrew Morton wrote: > On Tue, 20 May 2014 15:52:07 +0930 Rusty Russell wrote: > >> "Kirill A. Shutemov" writes: >>> Andrew Morton wrote: >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>> >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>> >>>> Yes. And shame on me for missing it (this time!) at review. >>>> >>>> There's still time to fix this. Patches, please. >>> >>> Here it is. Made at 3.30 AM, build tested only. >> >> Prefer on top of Maddy's patch which makes it always a variable, rather >> than CONFIG_DEBUG_FS. It's got enough hair as it is. >> > > We're at 3.15-rc5 and this interface should be finalised for 3.16. So > Kirrill's patch is pretty urgent and should come first. > > Well. It's only a debugfs interface at this stage so we are allowed to > change it later, but it's better not to. > My patchset does not change the interface, but uses the current fault around order variable from CONFIG_DEBUG_FS block to allow changes at runtime, instead of having a constant and some cleanup. Thanks for review Regards --Maddy From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp09.in.ibm.com (e28smtp09.in.ibm.com [122.248.162.9]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 078181A0852 for ; Tue, 20 May 2014 18:04:06 +1000 (EST) Received: from /spool/local by e28smtp09.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 13:34:03 +0530 Received: from d28relay05.in.ibm.com (d28relay05.in.ibm.com [9.184.220.62]) by d28dlp03.in.ibm.com (Postfix) with ESMTP id C192A125804D for ; Tue, 20 May 2014 13:33:08 +0530 (IST) Received: from d28av03.in.ibm.com (d28av03.in.ibm.com [9.184.220.65]) by d28relay05.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4K84Cu1196912 for ; Tue, 20 May 2014 13:34:12 +0530 Received: from d28av03.in.ibm.com (localhost [127.0.0.1]) by d28av03.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4K840nC023115 for ; Tue, 20 May 2014 13:34:01 +0530 Message-ID: <537B0C6E.4030501@linux.vnet.ibm.com> Date: Tue, 20 May 2014 13:33:58 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 To: Andrew Morton Subject: Re: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> <20140520002834.aefb5a90.akpm@linux-foundation.org> In-Reply-To: <20140520002834.aefb5a90.akpm@linux-foundation.org> Content-Type: text/plain; charset=ISO-8859-1 Cc: linux-arch@vger.kernel.org, riel@redhat.com, rusty@rustcorp.com.au, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, kirill.shutemov@linux.intel.com List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Tuesday 20 May 2014 12:58 PM, Andrew Morton wrote: > On Thu, 8 May 2014 14:58:16 +0530 Madhavan Srinivasan wrote: > >> --- a/arch/powerpc/platforms/pseries/pseries.h >> +++ b/arch/powerpc/platforms/pseries/pseries.h >> @@ -17,6 +17,8 @@ struct device_node; >> extern void request_event_sources_irqs(struct device_node *np, >> irq_handler_t handler, const char *name); >> >> +extern unsigned int fault_around_order; > > This isn't an appropriate header file for exporting something from core > mm - what happens if arch/mn10300 wants it?. > > I guess include/linux/mm.h is the place. > Rusty already suggested this. My bad. Reason for adding it here was that, I did the performance test for this platform. Will change and send it out. Thanks for review Regards Maddy From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by lists.ozlabs.org (Postfix) with ESMTP id 173A41A086B for ; Tue, 20 May 2014 20:27:47 +1000 (EST) From: "Kirill A. Shutemov" To: Rusty Russell In-Reply-To: <87oaythsvk.fsf@rustcorp.com.au> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140520102738.7F096E009B@blue.fi.intel.com> Date: Tue, 20 May 2014 13:27:38 +0300 (EEST) Cc: linux-arch@vger.kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, Andrew Morton , linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Rusty Russell wrote: > "Kirill A. Shutemov" writes: > > Andrew Morton wrote: > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >> > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >> > the order of the fault-around size in bytes, and fault_around_pages() > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > >> > >> Yes. And shame on me for missing it (this time!) at review. > >> > >> There's still time to fix this. Patches, please. > > > > Here it is. Made at 3.30 AM, build tested only. > > Prefer on top of Maddy's patch which makes it always a variable, rather > than CONFIG_DEBUG_FS. It's got enough hair as it is. Something like this? From: "Kirill A. Shutemov" Date: Tue, 20 May 2014 13:02:03 +0300 Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order There are evidences that faultaround feature is less relevant on architectures with page size bigger then 4k. Which makes sense since page fault overhead per byte of mapped area should be less there. Let's rework the feature to specify faultaround area in bytes instead of page order. It's 64 kilobytes for now. The patch effectively disables faultaround on architectures with page size >= 64k (like ppc64). It's possible that some other size of faultaround area is relevant for a platform. We can expose `fault_around_bytes' variable to arch-specific code once such platforms will be found. Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 037b812a9531..252b319e8cdf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +static unsigned long fault_around_bytes = 65536; + +static inline unsigned long fault_around_pages(void) +{ + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; +} + +static inline unsigned long fault_around_mask(void) +{ + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; +} -#ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; -static int fault_around_order_get(void *data, u64 *val) +#ifdef CONFIG_DEBUG_FS +static int fault_around_bytes_get(void *data, u64 *val) { - *val = fault_around_order; + *val = fault_around_bytes; return 0; } -static int fault_around_order_set(void *data, u64 val) +static int fault_around_bytes_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); - if (1UL << val > PTRS_PER_PTE) + if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; - fault_around_order = val; + fault_around_bytes = val; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, - fault_around_order_get, fault_around_order_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { void *ret; - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, - &fault_around_order_fops); + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, + &fault_around_bytes_fops); if (!ret) - pr_warn("Failed to create fault_around_order in debugfs"); + pr_warn("Failed to create fault_around_bytes in debugfs"); return 0; } late_initcall(fault_around_debugfs); - -static inline unsigned long fault_around_pages(void) -{ - return 1UL << fault_around_order; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); -} -#else -static inline unsigned long fault_around_pages(void) -{ - unsigned long nr_pages; - - nr_pages = 1UL << FAULT_AROUND_ORDER; - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); - return nr_pages; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); -} #endif static void do_fault_around(struct vm_area_struct *vma, unsigned long address, @@ -3515,7 +3499,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) by lists.ozlabs.org (Postfix) with ESMTP id 937001A086F for ; Wed, 21 May 2014 05:59:58 +1000 (EST) Date: Tue, 20 May 2014 12:59:56 -0700 From: Andrew Morton To: "Kirill A. Shutemov" Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> In-Reply-To: <20140520102738.7F096E009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Cc: linux-arch@vger.kernel.org, x86@kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Tue, 20 May 2014 13:27:38 +0300 (EEST) "Kirill A. Shutemov" wrote: > Rusty Russell wrote: > > "Kirill A. Shutemov" writes: > > > Andrew Morton wrote: > > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > >> > > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > >> > the order of the fault-around size in bytes, and fault_around_pages() > > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > > >> > > >> Yes. And shame on me for missing it (this time!) at review. > > >> > > >> There's still time to fix this. Patches, please. > > > > > > Here it is. Made at 3.30 AM, build tested only. > > > > Prefer on top of Maddy's patch which makes it always a variable, rather > > than CONFIG_DEBUG_FS. It's got enough hair as it is. > > Something like this? This appears to be against mainline, not against Madhavan's patch. As mentioned previously, I'd prefer it that way but confused. > From: "Kirill A. Shutemov" > Date: Tue, 20 May 2014 13:02:03 +0300 > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > There are evidences that faultaround feature is less relevant on > architectures with page size bigger then 4k. Which makes sense since > page fault overhead per byte of mapped area should be less there. > > Let's rework the feature to specify faultaround area in bytes instead of > page order. It's 64 kilobytes for now. > > The patch effectively disables faultaround on architectures with > page size >= 64k (like ppc64). > > It's possible that some other size of faultaround area is relevant for a > platform. We can expose `fault_around_bytes' variable to arch-specific > code once such platforms will be found. > > Signed-off-by: Kirill A. Shutemov > --- > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > 1 file changed, 23 insertions(+), 39 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 037b812a9531..252b319e8cdf 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > update_mmu_cache(vma, address, pte); > } > > -#define FAULT_AROUND_ORDER 4 > +static unsigned long fault_around_bytes = 65536; > + > +static inline unsigned long fault_around_pages(void) > +{ > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > +} I think we should round up, not down. So if the user asks for 1kb, they get one page. So this becomes return PAGE_ALIGN(fault_around_bytes) / PAGE_SIZE; > +static inline unsigned long fault_around_mask(void) > +{ > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > +} And this has me a bit stumped. It's not helpful that do_fault_around() is undocumented. Does it fault in N/2 pages ahead and N/2 pages behind? Or does it align the address down to the highest multiple of fault_around_bytes? It appears to be the latter, so the location of the faultaround window around the fault address is basically random, depending on what address userspace happened to pick. I don't know why we did this :( Or something. Can we please get some code commentary over do_fault_around() describing this design decision and explaining the reasoning behind it? Also, "neast" is not a word. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by lists.ozlabs.org (Postfix) with ESMTP id 065EC1A081F for ; Wed, 21 May 2014 23:40:41 +1000 (EST) From: "Kirill A. Shutemov" To: Andrew Morton In-Reply-To: <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140521134027.263DDE009B@blue.fi.intel.com> Date: Wed, 21 May 2014 16:40:27 +0300 (EEST) Cc: linux-arch@vger.kernel.org, x86@kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Andrew Morton wrote: > On Tue, 20 May 2014 13:27:38 +0300 (EEST) "Kirill A. Shutemov" wrote: > > > Rusty Russell wrote: > > > "Kirill A. Shutemov" writes: > > > > Andrew Morton wrote: > > > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > > >> > > > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > > >> > the order of the fault-around size in bytes, and fault_around_pages() > > > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > > > >> > > > >> Yes. And shame on me for missing it (this time!) at review. > > > >> > > > >> There's still time to fix this. Patches, please. > > > > > > > > Here it is. Made at 3.30 AM, build tested only. > > > > > > Prefer on top of Maddy's patch which makes it always a variable, rather > > > than CONFIG_DEBUG_FS. It's got enough hair as it is. > > > > Something like this? > > This appears to be against mainline, not against Madhavan's patch. As > mentioned previously, I'd prefer it that way but confused. > > > > From: "Kirill A. Shutemov" > > Date: Tue, 20 May 2014 13:02:03 +0300 > > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > > > There are evidences that faultaround feature is less relevant on > > architectures with page size bigger then 4k. Which makes sense since > > page fault overhead per byte of mapped area should be less there. > > > > Let's rework the feature to specify faultaround area in bytes instead of > > page order. It's 64 kilobytes for now. > > > > The patch effectively disables faultaround on architectures with > > page size >= 64k (like ppc64). > > > > It's possible that some other size of faultaround area is relevant for a > > platform. We can expose `fault_around_bytes' variable to arch-specific > > code once such platforms will be found. > > > > Signed-off-by: Kirill A. Shutemov > > --- > > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > > 1 file changed, 23 insertions(+), 39 deletions(-) > > > > diff --git a/mm/memory.c b/mm/memory.c > > index 037b812a9531..252b319e8cdf 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > > update_mmu_cache(vma, address, pte); > > } > > > > -#define FAULT_AROUND_ORDER 4 > > +static unsigned long fault_around_bytes = 65536; > > + > > +static inline unsigned long fault_around_pages(void) > > +{ > > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > > +} > > I think we should round up, not down. So if the user asks for 1kb, > they get one page. > > So this becomes > > return PAGE_ALIGN(fault_around_bytes) / PAGE_SIZE; See below. > > +static inline unsigned long fault_around_mask(void) > > +{ > > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > > +} > > And this has me a bit stumped. It's not helpful that do_fault_around() > is undocumented. Does it fault in N/2 pages ahead and N/2 pages > behind? Or does it align the address down to the highest multiple of > fault_around_bytes? It appears to be the latter, so the location of > the faultaround window around the fault address is basically random, > depending on what address userspace happened to pick. I don't know why > we did this :( When we call ->map_pages() we need to make sure that we stay within VMA and the page table. We don't want to cross page table boundary, because page table is what ptlock covers in split ptlock case. I've designed the feature with fault area nominated in page order in mind and I found it's easier to make sure we don't cross boundaries, if we would align virtual address of fault around area to PAGE_SIZE << FAULT_AROUND_ORDER. And yes fault address may be anywhere within the area. You can think about this as a virtual page with size PAGE_SIZE << FAULT_AROUND_ORDER: no matter what is fault address, we handle area naturally aligned to page size which fault address belong to. I've used rounddown_pow_of_two() in the patch to align to nearest page order, not to page size, because that's what current do_fault_around() expect to see. And roundup is not an option: nobody expects fault around area to be 128k if fault_around_bytes set to 64k + 1 bytes. If you think we need this I can rework do_fault_around() to handle non-pow-of-two fault_around_pages(), but I don't think it's good idea to do this for v3.15. Anyway, patch I've proposed allows change fault_around_bytes only from DEBUG_FS and roundown should be good enough there. > Or something. Can we please get some code commentary over > do_fault_around() describing this design decision and explaining the > reasoning behind it? I'll do this. But if do_fault_around() rework is needed, I want to do that first. > Also, "neast" is not a word. :facepalm: From: "Kirill A. Shutemov" Date: Wed, 21 May 2014 16:36:42 +0300 Subject: [PATCH] mm: fix typo in comment in do_fault_around() Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 252b319e8cdf..f76663c31da6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3460,7 +3460,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, /* * max_pgoff is either end of page table or end of vma - * or fault_around_pages() from pgoff, depending what is neast. + * or fault_around_pages() from pgoff, depending what is nearest. */ max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) by lists.ozlabs.org (Postfix) with ESMTP id 706FB1A0814 for ; Thu, 22 May 2014 06:34:11 +1000 (EST) Date: Wed, 21 May 2014 13:34:08 -0700 From: Andrew Morton To: "Kirill A. Shutemov" Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> In-Reply-To: <20140521134027.263DDE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> <20140521134027.263DDE009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Cc: linux-arch@vger.kernel.org, x86@kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Wed, 21 May 2014 16:40:27 +0300 (EEST) "Kirill A. Shutemov" wrote: > > Or something. Can we please get some code commentary over > > do_fault_around() describing this design decision and explaining the > > reasoning behind it? > > I'll do this. But if do_fault_around() rework is needed, I want to do that > first. This sort of thing should be at least partially driven by observation and I don't have the data for that. My seat of the pants feel is that after the first fault, accesses at higher addresses are more common/probable than accesses at lower addresses. In which case we should see improvements by centering the window at some higher address than the fault. Much instrumentation and downstream analysis is needed and the returns will be pretty small! But we don't need to do all that right now. Let's get the current implementation wrapped up for 3.15: get the interface finalized (bytes, not pages!) and get the current design decisions appropriately documented. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by lists.ozlabs.org (Postfix) with ESMTP id 2927B1A007D for ; Fri, 23 May 2014 22:41:39 +1000 (EST) From: "Kirill A. Shutemov" To: Andrew Morton In-Reply-To: <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> <20140521134027.263DDE009B@blue.fi.intel.com> <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140523122854.BDB36E009B@blue.fi.intel.com> Date: Fri, 23 May 2014 15:28:54 +0300 (EEST) Cc: linux-arch@vger.kernel.org, x86@kernel.org, riel@redhat.com, Madhavan Srinivasan , dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Andrew Morton wrote: > On Wed, 21 May 2014 16:40:27 +0300 (EEST) "Kirill A. Shutemov" wrote: > > > > Or something. Can we please get some code commentary over > > > do_fault_around() describing this design decision and explaining the > > > reasoning behind it? > > > > I'll do this. But if do_fault_around() rework is needed, I want to do that > > first. > > This sort of thing should be at least partially driven by observation > and I don't have the data for that. My seat of the pants feel is that > after the first fault, accesses at higher addresses are more > common/probable than accesses at lower addresses. It's probably true for data, but the feature is mostly targeted to code pages and situation is not that obvious to me with all jumps. > But we don't need to do all that right now. Let's get the current > implementation wrapped up for 3.15: get the interface finalized (bytes, > not pages!) The patch above by thread is okay for that, right? > and get the current design decisions appropriately documented. Here it is. Based on patch to convert order->bytes. From: "Kirill A. Shutemov" Date: Fri, 23 May 2014 15:16:47 +0300 Subject: [PATCH] mm: document do_fault_around() feature Some clarification on how faultaround works. Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 252b319e8cdf..8d723b8d3c86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3404,6 +3404,10 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, static unsigned long fault_around_bytes = 65536; +/* + * fault_around_pages() and fault_around_mask() round down fault_around_bytes + * to nearest page order. It's what do_fault_around() expects to see. + */ static inline unsigned long fault_around_pages(void) { return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; @@ -3445,6 +3449,29 @@ static int __init fault_around_debugfs(void) late_initcall(fault_around_debugfs); #endif +/* + * do_fault_around() tries to map few pages around the fault address. The hope + * is that the pages will be needed soon and this would lower the number of + * faults to handle. + * + * It uses vm_ops->map_pages() to map the pages, which skips the page if it's + * not ready to be mapped: not up-to-date, locked, etc. + * + * This function is called with the page table lock taken. In the split ptlock + * case the page table lock only protects only those entries which belong to + * page table corresponding to the fault address. + * + * This function don't cross the VMA boundaries in order to call map_pages() + * only once. + * + * fault_around_pages() defines how many pages we'll try to map. + * do_fault_around() expects it to be power of two and less or equal to + * PTRS_PER_PTE. + * + * The virtual address of the area that we map is naturally aligned to the + * fault_around_pages() (and therefore to page order). This way it's easier to + * guarantee that we don't cross the page table boundaries. + */ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, pte_t *pte, pgoff_t pgoff, unsigned int flags) { -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp09.in.ibm.com (e28smtp09.in.ibm.com [122.248.162.9]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id C26161A0591 for ; Tue, 27 May 2014 16:25:05 +1000 (EST) Received: from /spool/local by e28smtp09.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 27 May 2014 11:54:58 +0530 Received: from d28relay02.in.ibm.com (d28relay02.in.ibm.com [9.184.220.59]) by d28dlp02.in.ibm.com (Postfix) with ESMTP id E07C3394004C for ; Tue, 27 May 2014 11:54:52 +0530 (IST) Received: from d28av04.in.ibm.com (d28av04.in.ibm.com [9.184.220.66]) by d28relay02.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4R6PPYC48890036 for ; Tue, 27 May 2014 11:55:25 +0530 Received: from d28av04.in.ibm.com (localhost [127.0.0.1]) by d28av04.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4R6OoDH026443 for ; Tue, 27 May 2014 11:54:51 +0530 Message-ID: <53842FB1.7090909@linux.vnet.ibm.com> Date: Tue, 27 May 2014 11:54:49 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 To: "Kirill A. Shutemov" , Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> In-Reply-To: <20140520102738.7F096E009B@blue.fi.intel.com> Content-Type: text/plain; charset=ISO-8859-1 Cc: linux-arch@vger.kernel.org, riel@redhat.com, ak@linux.intel.com, dave.hansen@intel.com, peterz@infradead.org, x86@kernel.org, Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, paulus@samba.org, mgorman@suse.de, Andrew Morton , linuxppc-dev@lists.ozlabs.org, mingo@kernel.org List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: > Rusty Russell wrote: >> "Kirill A. Shutemov" writes: >>> Andrew Morton wrote: >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>> >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>> >>>> Yes. And shame on me for missing it (this time!) at review. >>>> >>>> There's still time to fix this. Patches, please. >>> >>> Here it is. Made at 3.30 AM, build tested only. >> >> Prefer on top of Maddy's patch which makes it always a variable, rather >> than CONFIG_DEBUG_FS. It's got enough hair as it is. > > Something like this? > > From: "Kirill A. Shutemov" > Date: Tue, 20 May 2014 13:02:03 +0300 > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > There are evidences that faultaround feature is less relevant on > architectures with page size bigger then 4k. Which makes sense since > page fault overhead per byte of mapped area should be less there. > > Let's rework the feature to specify faultaround area in bytes instead of > page order. It's 64 kilobytes for now. > > The patch effectively disables faultaround on architectures with > page size >= 64k (like ppc64). > > It's possible that some other size of faultaround area is relevant for a > platform. We can expose `fault_around_bytes' variable to arch-specific > code once such platforms will be found. > > Signed-off-by: Kirill A. Shutemov > --- > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > 1 file changed, 23 insertions(+), 39 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 037b812a9531..252b319e8cdf 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > update_mmu_cache(vma, address, pte); > } > > -#define FAULT_AROUND_ORDER 4 > +static unsigned long fault_around_bytes = 65536; > + > +static inline unsigned long fault_around_pages(void) > +{ > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > +} > + > +static inline unsigned long fault_around_mask(void) > +{ > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > +} > > -#ifdef CONFIG_DEBUG_FS > -static unsigned int fault_around_order = FAULT_AROUND_ORDER; > > -static int fault_around_order_get(void *data, u64 *val) > +#ifdef CONFIG_DEBUG_FS > +static int fault_around_bytes_get(void *data, u64 *val) > { > - *val = fault_around_order; > + *val = fault_around_bytes; > return 0; > } > > -static int fault_around_order_set(void *data, u64 val) > +static int fault_around_bytes_set(void *data, u64 val) > { Kindly ignore the question if not relevant. Even though we need root access to alter the value, will we be fine with negative value?. Regards Maddy > - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); > - if (1UL << val > PTRS_PER_PTE) > + if (val / PAGE_SIZE > PTRS_PER_PTE) > return -EINVAL; > - fault_around_order = val; > + fault_around_bytes = val; > return 0; > } > -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, > - fault_around_order_get, fault_around_order_set, "%llu\n"); > +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, > + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); > > static int __init fault_around_debugfs(void) > { > void *ret; > > - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, > - &fault_around_order_fops); > + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, > + &fault_around_bytes_fops); > if (!ret) > - pr_warn("Failed to create fault_around_order in debugfs"); > + pr_warn("Failed to create fault_around_bytes in debugfs"); > return 0; > } > late_initcall(fault_around_debugfs); > - > -static inline unsigned long fault_around_pages(void) > -{ > - return 1UL << fault_around_order; > -} > - > -static inline unsigned long fault_around_mask(void) > -{ > - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); > -} > -#else > -static inline unsigned long fault_around_pages(void) > -{ > - unsigned long nr_pages; > - > - nr_pages = 1UL << FAULT_AROUND_ORDER; > - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); > - return nr_pages; > -} > - > -static inline unsigned long fault_around_mask(void) > -{ > - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); > -} > #endif > > static void do_fault_around(struct vm_area_struct *vma, unsigned long address, > @@ -3515,7 +3499,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, > * if page by the offset is not ready to be mapped (cold cache or > * something). > */ > - if (vma->vm_ops->map_pages) { > + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { > pte = pte_offset_map_lock(mm, pmd, address, &ptl); > do_fault_around(vma, address, pte, pgoff, flags); > if (!pte_same(*pte, orig_pte)) > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from e28smtp03.in.ibm.com (e28smtp03.in.ibm.com [122.248.162.3]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 05D9B1A05B7 for ; Tue, 27 May 2014 20:44:20 +1000 (EST) Received: from /spool/local by e28smtp03.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 27 May 2014 16:14:13 +0530 Received: from d28relay05.in.ibm.com (d28relay05.in.ibm.com [9.184.220.62]) by d28dlp02.in.ibm.com (Postfix) with ESMTP id EF422394005E for ; Tue, 27 May 2014 16:14:09 +0530 (IST) Received: from d28av04.in.ibm.com (d28av04.in.ibm.com [9.184.220.66]) by d28relay05.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4RAiLNP66322432 for ; Tue, 27 May 2014 16:14:21 +0530 Received: from d28av04.in.ibm.com (localhost [127.0.0.1]) by d28av04.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4RAi6ot011179 for ; Tue, 27 May 2014 16:14:09 +0530 Message-ID: <53846C75.10507@linux.vnet.ibm.com> Date: Tue, 27 May 2014 16:14:05 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 To: "Kirill A. Shutemov" Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <53842FB1.7090909@linux.vnet.ibm.com> <20140527102200.012BBE009B@blue.fi.intel.com> In-Reply-To: <20140527102200.012BBE009B@blue.fi.intel.com> Content-Type: text/plain; charset=ISO-8859-1 Cc: linux-arch@vger.kernel.org, riel@redhat.com, x86@kernel.org, dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, Andrew Morton , linuxppc-dev@lists.ozlabs.org, mingo@kernel.org List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , On Tuesday 27 May 2014 03:51 PM, Kirill A. Shutemov wrote: > Madhavan Srinivasan wrote: >> On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: >>> Rusty Russell wrote: >>>> "Kirill A. Shutemov" writes: >>>>> Andrew Morton wrote: >>>>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>>>> >>>>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>>>> >>>>>> Yes. And shame on me for missing it (this time!) at review. >>>>>> >>>>>> There's still time to fix this. Patches, please. >>>>> >>>>> Here it is. Made at 3.30 AM, build tested only. >>>> >>>> Prefer on top of Maddy's patch which makes it always a variable, rather >>>> than CONFIG_DEBUG_FS. It's got enough hair as it is. >>> >>> Something like this? >>> >>> From: "Kirill A. Shutemov" >>> Date: Tue, 20 May 2014 13:02:03 +0300 >>> Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order >>> >>> There are evidences that faultaround feature is less relevant on >>> architectures with page size bigger then 4k. Which makes sense since >>> page fault overhead per byte of mapped area should be less there. >>> >>> Let's rework the feature to specify faultaround area in bytes instead of >>> page order. It's 64 kilobytes for now. >>> >>> The patch effectively disables faultaround on architectures with >>> page size >= 64k (like ppc64). >>> >>> It's possible that some other size of faultaround area is relevant for a >>> platform. We can expose `fault_around_bytes' variable to arch-specific >>> code once such platforms will be found. >>> >>> Signed-off-by: Kirill A. Shutemov >>> --- >>> mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- >>> 1 file changed, 23 insertions(+), 39 deletions(-) >>> >>> diff --git a/mm/memory.c b/mm/memory.c >>> index 037b812a9531..252b319e8cdf 100644 >>> --- a/mm/memory.c >>> +++ b/mm/memory.c >>> @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, >>> update_mmu_cache(vma, address, pte); >>> } >>> >>> -#define FAULT_AROUND_ORDER 4 >>> +static unsigned long fault_around_bytes = 65536; >>> + >>> +static inline unsigned long fault_around_pages(void) >>> +{ >>> + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; >>> +} >>> + >>> +static inline unsigned long fault_around_mask(void) >>> +{ >>> + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; >>> +} >>> >>> -#ifdef CONFIG_DEBUG_FS >>> -static unsigned int fault_around_order = FAULT_AROUND_ORDER; >>> >>> -static int fault_around_order_get(void *data, u64 *val) >>> +#ifdef CONFIG_DEBUG_FS >>> +static int fault_around_bytes_get(void *data, u64 *val) >>> { >>> - *val = fault_around_order; >>> + *val = fault_around_bytes; >>> return 0; >>> } >>> >>> -static int fault_around_order_set(void *data, u64 val) >>> +static int fault_around_bytes_set(void *data, u64 val) >>> { >> >> Kindly ignore the question if not relevant. Even though we need root >> access to alter the value, will we be fine with >> negative value?. > ppc > val is u64. or I miss something? > My Bad. What I wanted to check was for all 0xf input and guess we are fine. Sorry about that. Regards Maddy From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by lists.ozlabs.org (Postfix) with ESMTP id 765901A05B7 for ; Tue, 27 May 2014 20:22:08 +1000 (EST) From: "Kirill A. Shutemov" To: Madhavan Srinivasan In-Reply-To: <53842FB1.7090909@linux.vnet.ibm.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <53842FB1.7090909@linux.vnet.ibm.com> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140527102200.012BBE009B@blue.fi.intel.com> Date: Tue, 27 May 2014 13:21:59 +0300 (EEST) Cc: linux-arch@vger.kernel.org, riel@redhat.com, x86@kernel.org, dave.hansen@intel.com, peterz@infradead.org, Rusty Russell , Hugh Dickins , linux-kernel@vger.kernel.org, linux-mm@kvack.org, ak@linux.intel.com, paulus@samba.org, mgorman@suse.de, Andrew Morton , linuxppc-dev@lists.ozlabs.org, mingo@kernel.org, "Kirill A. Shutemov" List-Id: Linux on PowerPC Developers Mail List List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Madhavan Srinivasan wrote: > On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: > > Rusty Russell wrote: > >> "Kirill A. Shutemov" writes: > >>> Andrew Morton wrote: > >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >>>> > >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >>>>> the order of the fault-around size in bytes, and fault_around_pages() > >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) > >>>> > >>>> Yes. And shame on me for missing it (this time!) at review. > >>>> > >>>> There's still time to fix this. Patches, please. > >>> > >>> Here it is. Made at 3.30 AM, build tested only. > >> > >> Prefer on top of Maddy's patch which makes it always a variable, rather > >> than CONFIG_DEBUG_FS. It's got enough hair as it is. > > > > Something like this? > > > > From: "Kirill A. Shutemov" > > Date: Tue, 20 May 2014 13:02:03 +0300 > > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > > > There are evidences that faultaround feature is less relevant on > > architectures with page size bigger then 4k. Which makes sense since > > page fault overhead per byte of mapped area should be less there. > > > > Let's rework the feature to specify faultaround area in bytes instead of > > page order. It's 64 kilobytes for now. > > > > The patch effectively disables faultaround on architectures with > > page size >= 64k (like ppc64). > > > > It's possible that some other size of faultaround area is relevant for a > > platform. We can expose `fault_around_bytes' variable to arch-specific > > code once such platforms will be found. > > > > Signed-off-by: Kirill A. Shutemov > > --- > > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > > 1 file changed, 23 insertions(+), 39 deletions(-) > > > > diff --git a/mm/memory.c b/mm/memory.c > > index 037b812a9531..252b319e8cdf 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > > update_mmu_cache(vma, address, pte); > > } > > > > -#define FAULT_AROUND_ORDER 4 > > +static unsigned long fault_around_bytes = 65536; > > + > > +static inline unsigned long fault_around_pages(void) > > +{ > > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > > +} > > + > > +static inline unsigned long fault_around_mask(void) > > +{ > > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > > +} > > > > -#ifdef CONFIG_DEBUG_FS > > -static unsigned int fault_around_order = FAULT_AROUND_ORDER; > > > > -static int fault_around_order_get(void *data, u64 *val) > > +#ifdef CONFIG_DEBUG_FS > > +static int fault_around_bytes_get(void *data, u64 *val) > > { > > - *val = fault_around_order; > > + *val = fault_around_bytes; > > return 0; > > } > > > > -static int fault_around_order_set(void *data, u64 val) > > +static int fault_around_bytes_set(void *data, u64 val) > > { > > Kindly ignore the question if not relevant. Even though we need root > access to alter the value, will we be fine with > negative value?. val is u64. or I miss something? -- Kirill A. Shutemov From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f54.google.com (mail-pa0-f54.google.com [209.85.220.54]) by kanga.kvack.org (Postfix) with ESMTP id 2FA7C6B0038 for ; Mon, 19 May 2014 19:43:04 -0400 (EDT) Received: by mail-pa0-f54.google.com with SMTP id bj1so6523962pad.27 for ; Mon, 19 May 2014 16:43:03 -0700 (PDT) Received: from mail.linuxfoundation.org (mail.linuxfoundation.org. [140.211.169.12]) by mx.google.com with ESMTP id vx5si21505845pab.104.2014.05.19.16.43.03 for ; Mon, 19 May 2014 16:43:03 -0700 (PDT) Date: Mon, 19 May 2014 16:43:01 -0700 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> In-Reply-To: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Hugh Dickins Cc: Madhavan Srinivasan , Rusty Russell , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) Yes. And shame on me for missing it (this time!) at review. There's still time to fix this. Patches, please. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f177.google.com (mail-pd0-f177.google.com [209.85.192.177]) by kanga.kvack.org (Postfix) with ESMTP id EA8DB6B0036 for ; Mon, 19 May 2014 22:07:06 -0400 (EDT) Received: by mail-pd0-f177.google.com with SMTP id g10so85346pdj.22 for ; Mon, 19 May 2014 19:07:06 -0700 (PDT) Received: from e23smtp08.au.ibm.com (e23smtp08.au.ibm.com. [202.81.31.141]) by mx.google.com with ESMTPS id xl4si22183148pab.5.2014.05.19.19.07.05 for (version=TLSv1 cipher=RC4-SHA bits=128/128); Mon, 19 May 2014 19:07:05 -0700 (PDT) Received: from /spool/local by e23smtp08.au.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 12:07:00 +1000 Received: from d23relay03.au.ibm.com (d23relay03.au.ibm.com [9.190.235.21]) by d23dlp01.au.ibm.com (Postfix) with ESMTP id CDFA02CE8052 for ; Tue, 20 May 2014 12:06:56 +1000 (EST) Received: from d23av03.au.ibm.com (d23av03.au.ibm.com [9.190.234.97]) by d23relay03.au.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4K26fZE000478 for ; Tue, 20 May 2014 12:06:41 +1000 Received: from d23av03.au.ibm.com (localhost [127.0.0.1]) by d23av03.au.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4K26t4P018775 for ; Tue, 20 May 2014 12:06:55 +1000 Message-ID: <537AB8B2.3040000@linux.vnet.ibm.com> Date: Tue, 20 May 2014 07:36:42 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> In-Reply-To: Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Hugh Dickins Cc: Rusty Russell , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tuesday 20 May 2014 04:53 AM, Hugh Dickins wrote: > On Mon, 19 May 2014, Madhavan Srinivasan wrote: >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: >>> Hugh Dickins writes: >>>> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >>>>> >>>>> Hi Ingo, >>>>> >>>>> Do you have any comments for the latest version of the patchset. If >>>>> not, kindly can you pick it up as is. >>>>> >>>>> >>>>> With regards >>>>> Maddy >>>>> >>>>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >>>>>> vm_ops->map_pages() for mapping easy accessible pages around >>>>>> fault address in hope to reduce number of minor page faults. >>>>>> >>>>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >>>>>> value using mm/Kconfig. This will enable architecture maintainers >>>>>> to decide on suitable FAULT_AROUND_ORDER value based on >>>>>> performance data for that architecture. First patch also defaults >>>>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >>>>>> out the performance numbers for powerpc (platform pseries) and >>>>>> initialize the fault around order variable for pseries platform of >>>>>> powerpc. >>>> >>>> Sorry for not commenting earlier - just reminded by this ping to Ingo. >>>> >>>> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >>>> >>>> arch/powerpc/Kconfig suggests that Power supports base page size of >>>> 4k, 16k, 64k or 256k. >>>> >>>> I would expect your optimal fault_around_order to depend very much on >>>> the base page size. >>> >>> It was 64k, which is what PPC64 uses on all the major distributions. >>> You really only get a choice of 4k and 64k with 64 bit power. >>> >> This is true. PPC64 support multiple pagesize and yes the default page >> size of 64k, is taken as base pagesize for the tests. >> >>>> Perhaps fault_around_size would provide a more useful default? >>> >>> That seems to fit. With 4k pages and order 4, you're asking for 64k. >>> Maddy's result shows 64k is also reasonable for 64k pages. >>> >>> Perhaps we try to generalize from two data points (a slight improvement >>> over doing it from 1!), eg: >>> >>> /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ >>> unsigned int fault_around_order __read_mostly = >>> (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > Rusty's bimodal answer doesn't seem the right starting point to me. > > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) > - when that doesn't wrap, of course! > > That would at least have a better chance of being appropriate for > architectures with 8k and 16k pages (Itanium springs to mind). > > Not necessarily right for them, since each architecture may have > different faulting overheads; but a better chance of being right > than blindly assuming 4k or 64k pages for everyone. > > I'd be glad to see that change go into v3.15: what do you think, > Kirill, are we too late to make such a change now? > Or do you see some objection to it? > >> This may be right. But these are the concerns, will not this make other >> arch to pick default without any tuning > > Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? > Did other architectures, with other page sizes, back that default? > Clearly not powerpc. Ok. > >> and also this will remove the >> compile time option to disable the feature? > > Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig > for v3.16? > > I'm not sure whether Rusty was arguing against that or not I think > we are all three concerned to have a more sensible default than what's > there at present. I don't feel very strongly about your Kconfig Added it as one way to reset or disable the default value. But then I guess we decided on having FAULT_AROUND_ORDER as a variable which is more important than Kconfig option. > option: I've no objection, if it were to default to byte order 16. > Thanks for review With regards Maddy > Hugh > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pb0-f49.google.com (mail-pb0-f49.google.com [209.85.160.49]) by kanga.kvack.org (Postfix) with ESMTP id 6E8D36B0036 for ; Mon, 19 May 2014 22:10:09 -0400 (EDT) Received: by mail-pb0-f49.google.com with SMTP id jt11so6555414pbb.22 for ; Mon, 19 May 2014 19:10:09 -0700 (PDT) Received: from ozlabs.org (ozlabs.org. [103.22.144.67]) by mx.google.com with ESMTPS id rx8si22152303pab.186.2014.05.19.19.10.07 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 19 May 2014 19:10:08 -0700 (PDT) From: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> Date: Tue, 20 May 2014 10:44:06 +0930 Message-ID: <87d2f9jlpd.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Sender: owner-linux-mm@kvack.org List-ID: To: Hugh Dickins , Madhavan Srinivasan Cc: "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Hugh Dickins writes: > On Mon, 19 May 2014, Madhavan Srinivasan wrote: >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: >> > Hugh Dickins writes: >> >> On Thu, 15 May 2014, Madhavan Srinivasan wrote: >> >>> >> >>> Hi Ingo, >> >>> >> >>> Do you have any comments for the latest version of the patchset. If >> >>> not, kindly can you pick it up as is. >> >>> >> >>> >> >>> With regards >> >>> Maddy >> >>> >> >>>> Kirill A. Shutemov with 8c6e50b029 commit introduced >> >>>> vm_ops->map_pages() for mapping easy accessible pages around >> >>>> fault address in hope to reduce number of minor page faults. >> >>>> >> >>>> This patch creates infrastructure to modify the FAULT_AROUND_ORDER >> >>>> value using mm/Kconfig. This will enable architecture maintainers >> >>>> to decide on suitable FAULT_AROUND_ORDER value based on >> >>>> performance data for that architecture. First patch also defaults >> >>>> FAULT_AROUND_ORDER Kconfig element to 4. Second patch list >> >>>> out the performance numbers for powerpc (platform pseries) and >> >>>> initialize the fault around order variable for pseries platform of >> >>>> powerpc. >> >> >> >> Sorry for not commenting earlier - just reminded by this ping to Ingo. >> >> >> >> I didn't study your numbers, but nowhere did I see what PAGE_SIZE you use. >> >> >> >> arch/powerpc/Kconfig suggests that Power supports base page size of >> >> 4k, 16k, 64k or 256k. >> >> >> >> I would expect your optimal fault_around_order to depend very much on >> >> the base page size. >> > >> > It was 64k, which is what PPC64 uses on all the major distributions. >> > You really only get a choice of 4k and 64k with 64 bit power. >> > >> This is true. PPC64 support multiple pagesize and yes the default page >> size of 64k, is taken as base pagesize for the tests. >> >> >> Perhaps fault_around_size would provide a more useful default? >> > >> > That seems to fit. With 4k pages and order 4, you're asking for 64k. >> > Maddy's result shows 64k is also reasonable for 64k pages. >> > >> > Perhaps we try to generalize from two data points (a slight improvement >> > over doing it from 1!), eg: >> > >> > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ >> > unsigned int fault_around_order __read_mostly = >> > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > Rusty's bimodal answer doesn't seem the right starting point to me. ? It's not bimodal, it's graded. I think you misread? > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > the order of the fault-around size in bytes, and fault_around_pages() > use 1UL << (fault_around_order - PAGE_SHIFT) > - when that doesn't wrap, of course! > > That would at least have a better chance of being appropriate for > architectures with 8k and 16k pages (Itanium springs to mind). Well, from our two data points it seems that we want to fault in 64k at a time whatever our page size. Perhaps it's clearer if the code expresses itself that way. > Wasn't FAULT_AROUND_ORDER 4 chosen solely on the basis of x86 4k pages? > Did other architectures, with other page sizes, back that default? > Clearly not powerpc. Yeah, BenH flagged it as "we should test this" for powerpc, which is what Maddy then did. >> and also this will remove the >> compile time option to disable the feature? > > Compile time option meaning your FAULT_AROUND_ORDER in mm/Kconfig > for v3.16? > > I'm not sure whether Rusty was arguing against that or not. I think > we are all three concerned to have a more sensible default than what's > there at present. I don't feel very strongly about your Kconfig > option: I've no objection, if it were to default to byte order 16. I don't mind either. Cheers, Rusty. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f178.google.com (mail-pd0-f178.google.com [209.85.192.178]) by kanga.kvack.org (Postfix) with ESMTP id 8F11C6B0036 for ; Mon, 19 May 2014 22:35:45 -0400 (EDT) Received: by mail-pd0-f178.google.com with SMTP id v10so107413pde.9 for ; Mon, 19 May 2014 19:35:45 -0700 (PDT) Received: from mail-pd0-x230.google.com (mail-pd0-x230.google.com [2607:f8b0:400e:c02::230]) by mx.google.com with ESMTPS id in10si22305207pac.127.2014.05.19.19.35.44 for (version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Mon, 19 May 2014 19:35:44 -0700 (PDT) Received: by mail-pd0-f176.google.com with SMTP id p10so106098pdj.7 for ; Mon, 19 May 2014 19:35:44 -0700 (PDT) Date: Mon, 19 May 2014 19:34:27 -0700 (PDT) From: Hugh Dickins Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <87d2f9jlpd.fsf@rustcorp.com.au> Message-ID: References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <87d2f9jlpd.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Sender: owner-linux-mm@kvack.org List-ID: To: Rusty Russell Cc: Madhavan Srinivasan , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, akpm@linux-foundation.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tue, 20 May 2014, Rusty Russell wrote: > Hugh Dickins writes: > >> On Monday 19 May 2014 05:42 AM, Rusty Russell wrote: > >> > > >> > Perhaps we try to generalize from two data points (a slight improvement > >> > over doing it from 1!), eg: > >> > > >> > /* 4 seems good for 4k-page x86, 0 seems good for 64k page ppc64, so: */ > >> > unsigned int fault_around_order __read_mostly = > >> > (16 - PAGE_SHIFT < 0 ? 0 : 16 - PAGE_SHIFT); > > > > Rusty's bimodal answer doesn't seem the right starting point to me. > > ? It's not bimodal, it's graded. I think you misread? Yikes, worse than misread, more like I was too rude even to read: sorry! Hugh -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f54.google.com (mail-pa0-f54.google.com [209.85.220.54]) by kanga.kvack.org (Postfix) with ESMTP id 0D6C96B0036 for ; Tue, 20 May 2014 03:53:16 -0400 (EDT) Received: by mail-pa0-f54.google.com with SMTP id bj1so85977pad.13 for ; Tue, 20 May 2014 00:53:16 -0700 (PDT) Received: from e28smtp09.in.ibm.com (e28smtp09.in.ibm.com. [122.248.162.9]) by mx.google.com with ESMTPS id nj1si616591pbc.95.2014.05.20.00.53.14 for (version=TLSv1 cipher=RC4-SHA bits=128/128); Tue, 20 May 2014 00:53:16 -0700 (PDT) Received: from /spool/local by e28smtp09.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 13:23:12 +0530 Received: from d28relay01.in.ibm.com (d28relay01.in.ibm.com [9.184.220.58]) by d28dlp03.in.ibm.com (Postfix) with ESMTP id 7EAF6125805A for ; Tue, 20 May 2014 13:22:14 +0530 (IST) Received: from d28av03.in.ibm.com (d28av03.in.ibm.com [9.184.220.65]) by d28relay01.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4K7rV4N41353438 for ; Tue, 20 May 2014 13:23:31 +0530 Received: from d28av03.in.ibm.com (localhost [127.0.0.1]) by d28av03.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4K7r6O1005316 for ; Tue, 20 May 2014 13:23:06 +0530 Message-ID: <537B09DF.1090906@linux.vnet.ibm.com> Date: Tue, 20 May 2014 13:23:03 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520003201.a2360d5d.akpm@linux-foundation.org> In-Reply-To: <20140520003201.a2360d5d.akpm@linux-foundation.org> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Andrew Morton , Rusty Russell Cc: "Kirill A. Shutemov" , Hugh Dickins , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tuesday 20 May 2014 01:02 PM, Andrew Morton wrote: > On Tue, 20 May 2014 15:52:07 +0930 Rusty Russell wrote: > >> "Kirill A. Shutemov" writes: >>> Andrew Morton wrote: >>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>> >>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>> >>>> Yes. And shame on me for missing it (this time!) at review. >>>> >>>> There's still time to fix this. Patches, please. >>> >>> Here it is. Made at 3.30 AM, build tested only. >> >> Prefer on top of Maddy's patch which makes it always a variable, rather >> than CONFIG_DEBUG_FS. It's got enough hair as it is. >> > > We're at 3.15-rc5 and this interface should be finalised for 3.16. So > Kirrill's patch is pretty urgent and should come first. > > Well. It's only a debugfs interface at this stage so we are allowed to > change it later, but it's better not to. > My patchset does not change the interface, but uses the current fault around order variable from CONFIG_DEBUG_FS block to allow changes at runtime, instead of having a constant and some cleanup. Thanks for review Regards --Maddy -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f46.google.com (mail-pa0-f46.google.com [209.85.220.46]) by kanga.kvack.org (Postfix) with ESMTP id E1BF96B0036 for ; Tue, 20 May 2014 03:17:10 -0400 (EDT) Received: by mail-pa0-f46.google.com with SMTP id kq14so55377pab.33 for ; Tue, 20 May 2014 00:17:10 -0700 (PDT) Received: from ozlabs.org (ozlabs.org. [2401:3900:2:1::2]) by mx.google.com with ESMTPS id kv4si9762027pab.78.2014.05.20.00.17.09 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 20 May 2014 00:17:09 -0700 (PDT) From: Rusty Russell Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <20140520004429.E660AE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> Date: Tue, 20 May 2014 15:52:07 +0930 Message-ID: <87oaythsvk.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Sender: owner-linux-mm@kvack.org List-ID: To: "Kirill A. Shutemov" , Andrew Morton Cc: Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com "Kirill A. Shutemov" writes: > Andrew Morton wrote: >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >> >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >> > the order of the fault-around size in bytes, and fault_around_pages() >> > use 1UL << (fault_around_order - PAGE_SHIFT) >> >> Yes. And shame on me for missing it (this time!) at review. >> >> There's still time to fix this. Patches, please. > > Here it is. Made at 3.30 AM, build tested only. Prefer on top of Maddy's patch which makes it always a variable, rather than CONFIG_DEBUG_FS. It's got enough hair as it is. Cheers, Rusty. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f47.google.com (mail-pa0-f47.google.com [209.85.220.47]) by kanga.kvack.org (Postfix) with ESMTP id 1DBFB6B0036 for ; Tue, 20 May 2014 04:04:08 -0400 (EDT) Received: by mail-pa0-f47.google.com with SMTP id lf10so90507pab.34 for ; Tue, 20 May 2014 01:04:07 -0700 (PDT) Received: from e28smtp03.in.ibm.com (e28smtp03.in.ibm.com. [122.248.162.3]) by mx.google.com with ESMTPS id bu1si653109pbc.136.2014.05.20.01.04.06 for (version=TLSv1 cipher=RC4-SHA bits=128/128); Tue, 20 May 2014 01:04:07 -0700 (PDT) Received: from /spool/local by e28smtp03.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 20 May 2014 13:34:04 +0530 Received: from d28relay02.in.ibm.com (d28relay02.in.ibm.com [9.184.220.59]) by d28dlp03.in.ibm.com (Postfix) with ESMTP id 666FA1258054 for ; Tue, 20 May 2014 13:33:08 +0530 (IST) Received: from d28av03.in.ibm.com (d28av03.in.ibm.com [9.184.220.65]) by d28relay02.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4K84P5v55509138 for ; Tue, 20 May 2014 13:34:25 +0530 Received: from d28av03.in.ibm.com (localhost [127.0.0.1]) by d28av03.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4K840n6023115 for ; Tue, 20 May 2014 13:34:01 +0530 Message-ID: <537B0C6E.4030501@linux.vnet.ibm.com> Date: Tue, 20 May 2014 13:33:58 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 2/2] powerpc/pseries: init fault_around_order for pseries References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <1399541296-18810-3-git-send-email-maddy@linux.vnet.ibm.com> <20140520002834.aefb5a90.akpm@linux-foundation.org> In-Reply-To: <20140520002834.aefb5a90.akpm@linux-foundation.org> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: Andrew Morton Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, kirill.shutemov@linux.intel.com, rusty@rustcorp.com.au, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tuesday 20 May 2014 12:58 PM, Andrew Morton wrote: > On Thu, 8 May 2014 14:58:16 +0530 Madhavan Srinivasan wrote: > >> --- a/arch/powerpc/platforms/pseries/pseries.h >> +++ b/arch/powerpc/platforms/pseries/pseries.h >> @@ -17,6 +17,8 @@ struct device_node; >> extern void request_event_sources_irqs(struct device_node *np, >> irq_handler_t handler, const char *name); >> >> +extern unsigned int fault_around_order; > > This isn't an appropriate header file for exporting something from core > mm - what happens if arch/mn10300 wants it?. > > I guess include/linux/mm.h is the place. > Rusty already suggested this. My bad. Reason for adding it here was that, I did the performance test for this platform. Will change and send it out. Thanks for review Regards Maddy -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f179.google.com (mail-pd0-f179.google.com [209.85.192.179]) by kanga.kvack.org (Postfix) with ESMTP id 856266B0036 for ; Tue, 20 May 2014 06:27:46 -0400 (EDT) Received: by mail-pd0-f179.google.com with SMTP id x10so197998pdj.24 for ; Tue, 20 May 2014 03:27:46 -0700 (PDT) Received: from mga11.intel.com (mga11.intel.com. [192.55.52.93]) by mx.google.com with ESMTP id rl10si1150487pbc.161.2014.05.20.03.27.45 for ; Tue, 20 May 2014 03:27:45 -0700 (PDT) From: "Kirill A. Shutemov" In-Reply-To: <87oaythsvk.fsf@rustcorp.com.au> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Content-Transfer-Encoding: 7bit Message-Id: <20140520102738.7F096E009B@blue.fi.intel.com> Date: Tue, 20 May 2014 13:27:38 +0300 (EEST) Sender: owner-linux-mm@kvack.org List-ID: To: Rusty Russell Cc: "Kirill A. Shutemov" , Andrew Morton , Hugh Dickins , Madhavan Srinivasan "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Rusty Russell wrote: > "Kirill A. Shutemov" writes: > > Andrew Morton wrote: > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >> > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >> > the order of the fault-around size in bytes, and fault_around_pages() > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > >> > >> Yes. And shame on me for missing it (this time!) at review. > >> > >> There's still time to fix this. Patches, please. > > > > Here it is. Made at 3.30 AM, build tested only. > > Prefer on top of Maddy's patch which makes it always a variable, rather > than CONFIG_DEBUG_FS. It's got enough hair as it is. Something like this? From: "Kirill A. Shutemov" Date: Tue, 20 May 2014 13:02:03 +0300 Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order There are evidences that faultaround feature is less relevant on architectures with page size bigger then 4k. Which makes sense since page fault overhead per byte of mapped area should be less there. Let's rework the feature to specify faultaround area in bytes instead of page order. It's 64 kilobytes for now. The patch effectively disables faultaround on architectures with page size >= 64k (like ppc64). It's possible that some other size of faultaround area is relevant for a platform. We can expose `fault_around_bytes' variable to arch-specific code once such platforms will be found. Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 037b812a9531..252b319e8cdf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +static unsigned long fault_around_bytes = 65536; + +static inline unsigned long fault_around_pages(void) +{ + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; +} + +static inline unsigned long fault_around_mask(void) +{ + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; +} -#ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; -static int fault_around_order_get(void *data, u64 *val) +#ifdef CONFIG_DEBUG_FS +static int fault_around_bytes_get(void *data, u64 *val) { - *val = fault_around_order; + *val = fault_around_bytes; return 0; } -static int fault_around_order_set(void *data, u64 val) +static int fault_around_bytes_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); - if (1UL << val > PTRS_PER_PTE) + if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; - fault_around_order = val; + fault_around_bytes = val; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, - fault_around_order_get, fault_around_order_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { void *ret; - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, - &fault_around_order_fops); + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, + &fault_around_bytes_fops); if (!ret) - pr_warn("Failed to create fault_around_order in debugfs"); + pr_warn("Failed to create fault_around_bytes in debugfs"); return 0; } late_initcall(fault_around_debugfs); - -static inline unsigned long fault_around_pages(void) -{ - return 1UL << fault_around_order; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); -} -#else -static inline unsigned long fault_around_pages(void) -{ - unsigned long nr_pages; - - nr_pages = 1UL << FAULT_AROUND_ORDER; - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); - return nr_pages; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); -} #endif static void do_fault_around(struct vm_area_struct *vma, unsigned long address, @@ -3515,7 +3499,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- Kirill A. Shutemov -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pd0-f172.google.com (mail-pd0-f172.google.com [209.85.192.172]) by kanga.kvack.org (Postfix) with ESMTP id 992C76B0035 for ; Tue, 20 May 2014 15:59:58 -0400 (EDT) Received: by mail-pd0-f172.google.com with SMTP id x10so625228pdj.17 for ; Tue, 20 May 2014 12:59:58 -0700 (PDT) Received: from mail.linuxfoundation.org (mail.linuxfoundation.org. [140.211.169.12]) by mx.google.com with ESMTP id eb4si3182938pbb.113.2014.05.20.12.59.57 for ; Tue, 20 May 2014 12:59:57 -0700 (PDT) Date: Tue, 20 May 2014 12:59:56 -0700 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> In-Reply-To: <20140520102738.7F096E009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: "Kirill A. Shutemov" Cc: Rusty Russell , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tue, 20 May 2014 13:27:38 +0300 (EEST) "Kirill A. Shutemov" wrote: > Rusty Russell wrote: > > "Kirill A. Shutemov" writes: > > > Andrew Morton wrote: > > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > > >> > > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > > >> > the order of the fault-around size in bytes, and fault_around_pages() > > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > > >> > > >> Yes. And shame on me for missing it (this time!) at review. > > >> > > >> There's still time to fix this. Patches, please. > > > > > > Here it is. Made at 3.30 AM, build tested only. > > > > Prefer on top of Maddy's patch which makes it always a variable, rather > > than CONFIG_DEBUG_FS. It's got enough hair as it is. > > Something like this? This appears to be against mainline, not against Madhavan's patch. As mentioned previously, I'd prefer it that way but confused. > From: "Kirill A. Shutemov" > Date: Tue, 20 May 2014 13:02:03 +0300 > Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order > > There are evidences that faultaround feature is less relevant on > architectures with page size bigger then 4k. Which makes sense since > page fault overhead per byte of mapped area should be less there. > > Let's rework the feature to specify faultaround area in bytes instead of > page order. It's 64 kilobytes for now. > > The patch effectively disables faultaround on architectures with > page size >= 64k (like ppc64). > > It's possible that some other size of faultaround area is relevant for a > platform. We can expose `fault_around_bytes' variable to arch-specific > code once such platforms will be found. > > Signed-off-by: Kirill A. Shutemov > --- > mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- > 1 file changed, 23 insertions(+), 39 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 037b812a9531..252b319e8cdf 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > update_mmu_cache(vma, address, pte); > } > > -#define FAULT_AROUND_ORDER 4 > +static unsigned long fault_around_bytes = 65536; > + > +static inline unsigned long fault_around_pages(void) > +{ > + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; > +} I think we should round up, not down. So if the user asks for 1kb, they get one page. So this becomes return PAGE_ALIGN(fault_around_bytes) / PAGE_SIZE; > +static inline unsigned long fault_around_mask(void) > +{ > + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; > +} And this has me a bit stumped. It's not helpful that do_fault_around() is undocumented. Does it fault in N/2 pages ahead and N/2 pages behind? Or does it align the address down to the highest multiple of fault_around_bytes? It appears to be the latter, so the location of the faultaround window around the fault address is basically random, depending on what address userspace happened to pick. I don't know why we did this :( Or something. Can we please get some code commentary over do_fault_around() describing this design decision and explaining the reasoning behind it? Also, "neast" is not a word. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pa0-f46.google.com (mail-pa0-f46.google.com [209.85.220.46]) by kanga.kvack.org (Postfix) with ESMTP id E010C6B0036 for ; Wed, 21 May 2014 16:34:10 -0400 (EDT) Received: by mail-pa0-f46.google.com with SMTP id kq14so1738758pab.19 for ; Wed, 21 May 2014 13:34:10 -0700 (PDT) Received: from mail.linuxfoundation.org (mail.linuxfoundation.org. [140.211.169.12]) by mx.google.com with ESMTP id df3si7685330pbb.203.2014.05.21.13.34.09 for ; Wed, 21 May 2014 13:34:10 -0700 (PDT) Date: Wed, 21 May 2014 13:34:08 -0700 From: Andrew Morton Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Message-Id: <20140521133408.4d2f1a551e9652fb0e12265f@linux-foundation.org> In-Reply-To: <20140521134027.263DDE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <20140520125956.aa61a3bfd84d4d6190740ce2@linux-foundation.org> <20140521134027.263DDE009B@blue.fi.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: "Kirill A. Shutemov" Cc: Rusty Russell , Hugh Dickins , Madhavan Srinivasan , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Wed, 21 May 2014 16:40:27 +0300 (EEST) "Kirill A. Shutemov" wrote: > > Or something. Can we please get some code commentary over > > do_fault_around() describing this design decision and explaining the > > reasoning behind it? > > I'll do this. But if do_fault_around() rework is needed, I want to do that > first. This sort of thing should be at least partially driven by observation and I don't have the data for that. My seat of the pants feel is that after the first fault, accesses at higher addresses are more common/probable than accesses at lower addresses. In which case we should see improvements by centering the window at some higher address than the fault. Much instrumentation and downstream analysis is needed and the returns will be pretty small! But we don't need to do all that right now. Let's get the current implementation wrapped up for 3.15: get the interface finalized (bytes, not pages!) and get the current design decisions appropriately documented. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pb0-f53.google.com (mail-pb0-f53.google.com [209.85.160.53]) by kanga.kvack.org (Postfix) with ESMTP id C630F6B005A for ; Tue, 27 May 2014 06:44:32 -0400 (EDT) Received: by mail-pb0-f53.google.com with SMTP id md12so8972235pbc.26 for ; Tue, 27 May 2014 03:44:32 -0700 (PDT) Received: from e28smtp06.in.ibm.com (e28smtp06.in.ibm.com. [122.248.162.6]) by mx.google.com with ESMTPS id sl2si18091384pbc.221.2014.05.27.03.44.29 for (version=TLSv1 cipher=RC4-SHA bits=128/128); Tue, 27 May 2014 03:44:31 -0700 (PDT) Received: from /spool/local by e28smtp06.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Tue, 27 May 2014 16:14:26 +0530 Received: from d28relay03.in.ibm.com (d28relay03.in.ibm.com [9.184.220.60]) by d28dlp02.in.ibm.com (Postfix) with ESMTP id 575103940058 for ; Tue, 27 May 2014 16:14:24 +0530 (IST) Received: from d28av04.in.ibm.com (d28av04.in.ibm.com [9.184.220.66]) by d28relay03.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id s4RAiuQu1114508 for ; Tue, 27 May 2014 16:14:56 +0530 Received: from d28av04.in.ibm.com (localhost [127.0.0.1]) by d28av04.in.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP id s4RAi6on011179 for ; Tue, 27 May 2014 16:14:08 +0530 Message-ID: <53846C75.10507@linux.vnet.ibm.com> Date: Tue, 27 May 2014 16:14:05 +0530 From: Madhavan Srinivasan MIME-Version: 1.0 Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> <20140520102738.7F096E009B@blue.fi.intel.com> <53842FB1.7090909@linux.vnet.ibm.com> <20140527102200.012BBE009B@blue.fi.intel.com> In-Reply-To: <20140527102200.012BBE009B@blue.fi.intel.com> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: owner-linux-mm@kvack.org List-ID: To: "Kirill A. Shutemov" Cc: Rusty Russell , Andrew Morton , Hugh Dickins , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com On Tuesday 27 May 2014 03:51 PM, Kirill A. Shutemov wrote: > Madhavan Srinivasan wrote: >> On Tuesday 20 May 2014 03:57 PM, Kirill A. Shutemov wrote: >>> Rusty Russell wrote: >>>> "Kirill A. Shutemov" writes: >>>>> Andrew Morton wrote: >>>>>> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >>>>>> >>>>>>> Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >>>>>>> the order of the fault-around size in bytes, and fault_around_pages() >>>>>>> use 1UL << (fault_around_order - PAGE_SHIFT) >>>>>> >>>>>> Yes. And shame on me for missing it (this time!) at review. >>>>>> >>>>>> There's still time to fix this. Patches, please. >>>>> >>>>> Here it is. Made at 3.30 AM, build tested only. >>>> >>>> Prefer on top of Maddy's patch which makes it always a variable, rather >>>> than CONFIG_DEBUG_FS. It's got enough hair as it is. >>> >>> Something like this? >>> >>> From: "Kirill A. Shutemov" >>> Date: Tue, 20 May 2014 13:02:03 +0300 >>> Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order >>> >>> There are evidences that faultaround feature is less relevant on >>> architectures with page size bigger then 4k. Which makes sense since >>> page fault overhead per byte of mapped area should be less there. >>> >>> Let's rework the feature to specify faultaround area in bytes instead of >>> page order. It's 64 kilobytes for now. >>> >>> The patch effectively disables faultaround on architectures with >>> page size >= 64k (like ppc64). >>> >>> It's possible that some other size of faultaround area is relevant for a >>> platform. We can expose `fault_around_bytes' variable to arch-specific >>> code once such platforms will be found. >>> >>> Signed-off-by: Kirill A. Shutemov >>> --- >>> mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- >>> 1 file changed, 23 insertions(+), 39 deletions(-) >>> >>> diff --git a/mm/memory.c b/mm/memory.c >>> index 037b812a9531..252b319e8cdf 100644 >>> --- a/mm/memory.c >>> +++ b/mm/memory.c >>> @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, >>> update_mmu_cache(vma, address, pte); >>> } >>> >>> -#define FAULT_AROUND_ORDER 4 >>> +static unsigned long fault_around_bytes = 65536; >>> + >>> +static inline unsigned long fault_around_pages(void) >>> +{ >>> + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; >>> +} >>> + >>> +static inline unsigned long fault_around_mask(void) >>> +{ >>> + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; >>> +} >>> >>> -#ifdef CONFIG_DEBUG_FS >>> -static unsigned int fault_around_order = FAULT_AROUND_ORDER; >>> >>> -static int fault_around_order_get(void *data, u64 *val) >>> +#ifdef CONFIG_DEBUG_FS >>> +static int fault_around_bytes_get(void *data, u64 *val) >>> { >>> - *val = fault_around_order; >>> + *val = fault_around_bytes; >>> return 0; >>> } >>> >>> -static int fault_around_order_set(void *data, u64 val) >>> +static int fault_around_bytes_set(void *data, u64 val) >>> { >> >> Kindly ignore the question if not relevant. Even though we need root >> access to alter the value, will we be fine with >> negative value?. > ppc > val is u64. or I miss something? > My Bad. What I wanted to check was for all 0xf input and guess we are fine. Sorry about that. Regards Maddy -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751917AbaETHRL (ORCPT ); Tue, 20 May 2014 03:17:11 -0400 Received: from ozlabs.org ([103.22.144.67]:42378 "EHLO ozlabs.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750969AbaETHRJ (ORCPT ); Tue, 20 May 2014 03:17:09 -0400 From: Rusty Russell To: "Kirill A. Shutemov" , Andrew Morton Cc: Hugh Dickins , Madhavan Srinivasan , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc In-Reply-To: <20140520004429.E660AE009B@blue.fi.intel.com> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> User-Agent: Notmuch/0.17 (http://notmuchmail.org) Emacs/24.3.1 (x86_64-pc-linux-gnu) Date: Tue, 20 May 2014 15:52:07 +0930 Message-ID: <87oaythsvk.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org "Kirill A. Shutemov" writes: > Andrew Morton wrote: >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: >> >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be >> > the order of the fault-around size in bytes, and fault_around_pages() >> > use 1UL << (fault_around_order - PAGE_SHIFT) >> >> Yes. And shame on me for missing it (this time!) at review. >> >> There's still time to fix this. Patches, please. > > Here it is. Made at 3.30 AM, build tested only. Prefer on top of Maddy's patch which makes it always a variable, rather than CONFIG_DEBUG_FS. It's got enough hair as it is. Cheers, Rusty. From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752846AbaETK1r (ORCPT ); Tue, 20 May 2014 06:27:47 -0400 Received: from mga11.intel.com ([192.55.52.93]:2636 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751678AbaETK1p (ORCPT ); Tue, 20 May 2014 06:27:45 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.98,873,1392192000"; d="scan'208";a="542012738" From: "Kirill A. Shutemov" To: Rusty Russell Cc: "Kirill A. Shutemov" , Andrew Morton , Hugh Dickins , Madhavan Srinivasan , "Kirill A. Shutemov" , linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org, linux-arch@vger.kernel.org, x86@kernel.org, benh@kernel.crashing.org, paulus@samba.org, riel@redhat.com, mgorman@suse.de, ak@linux.intel.com, peterz@infradead.org, mingo@kernel.org, dave.hansen@intel.com In-Reply-To: <87oaythsvk.fsf@rustcorp.com.au> References: <1399541296-18810-1-git-send-email-maddy@linux.vnet.ibm.com> <537479E7.90806@linux.vnet.ibm.com> <87wqdik4n5.fsf@rustcorp.com.au> <53797511.1050409@linux.vnet.ibm.com> <20140519164301.eafd3dd288ccb88361ddcfc7@linux-foundation.org> <20140520004429.E660AE009B@blue.fi.intel.com> <87oaythsvk.fsf@rustcorp.com.au> Subject: Re: [PATCH V4 0/2] mm: FAULT_AROUND_ORDER patchset performance data for powerpc Content-Transfer-Encoding: 7bit Message-Id: <20140520102738.7F096E009B@blue.fi.intel.com> Date: Tue, 20 May 2014 13:27:38 +0300 (EEST) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Rusty Russell wrote: > "Kirill A. Shutemov" writes: > > Andrew Morton wrote: > >> On Mon, 19 May 2014 16:23:07 -0700 (PDT) Hugh Dickins wrote: > >> > >> > Shouldn't FAULT_AROUND_ORDER and fault_around_order be changed to be > >> > the order of the fault-around size in bytes, and fault_around_pages() > >> > use 1UL << (fault_around_order - PAGE_SHIFT) > >> > >> Yes. And shame on me for missing it (this time!) at review. > >> > >> There's still time to fix this. Patches, please. > > > > Here it is. Made at 3.30 AM, build tested only. > > Prefer on top of Maddy's patch which makes it always a variable, rather > than CONFIG_DEBUG_FS. It's got enough hair as it is. Something like this? From: "Kirill A. Shutemov" Date: Tue, 20 May 2014 13:02:03 +0300 Subject: [PATCH] mm: nominate faultaround area in bytes rather then page order There are evidences that faultaround feature is less relevant on architectures with page size bigger then 4k. Which makes sense since page fault overhead per byte of mapped area should be less there. Let's rework the feature to specify faultaround area in bytes instead of page order. It's 64 kilobytes for now. The patch effectively disables faultaround on architectures with page size >= 64k (like ppc64). It's possible that some other size of faultaround area is relevant for a platform. We can expose `fault_around_bytes' variable to arch-specific code once such platforms will be found. Signed-off-by: Kirill A. Shutemov --- mm/memory.c | 62 +++++++++++++++++++++++-------------------------------------- 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 037b812a9531..252b319e8cdf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3402,63 +3402,47 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -#define FAULT_AROUND_ORDER 4 +static unsigned long fault_around_bytes = 65536; + +static inline unsigned long fault_around_pages(void) +{ + return rounddown_pow_of_two(fault_around_bytes) / PAGE_SIZE; +} + +static inline unsigned long fault_around_mask(void) +{ + return ~(rounddown_pow_of_two(fault_around_bytes) - 1) & PAGE_MASK; +} -#ifdef CONFIG_DEBUG_FS -static unsigned int fault_around_order = FAULT_AROUND_ORDER; -static int fault_around_order_get(void *data, u64 *val) +#ifdef CONFIG_DEBUG_FS +static int fault_around_bytes_get(void *data, u64 *val) { - *val = fault_around_order; + *val = fault_around_bytes; return 0; } -static int fault_around_order_set(void *data, u64 val) +static int fault_around_bytes_set(void *data, u64 val) { - BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE); - if (1UL << val > PTRS_PER_PTE) + if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; - fault_around_order = val; + fault_around_bytes = val; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops, - fault_around_order_get, fault_around_order_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops, + fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { void *ret; - ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL, - &fault_around_order_fops); + ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL, + &fault_around_bytes_fops); if (!ret) - pr_warn("Failed to create fault_around_order in debugfs"); + pr_warn("Failed to create fault_around_bytes in debugfs"); return 0; } late_initcall(fault_around_debugfs); - -static inline unsigned long fault_around_pages(void) -{ - return 1UL << fault_around_order; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1); -} -#else -static inline unsigned long fault_around_pages(void) -{ - unsigned long nr_pages; - - nr_pages = 1UL << FAULT_AROUND_ORDER; - BUILD_BUG_ON(nr_pages > PTRS_PER_PTE); - return nr_pages; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1); -} #endif static void do_fault_around(struct vm_area_struct *vma, unsigned long address, @@ -3515,7 +3499,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages) { + if (vma->vm_ops->map_pages && fault_around_pages() > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- Kirill A. Shutemov