From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S938733AbcISFqD (ORCPT ); Mon, 19 Sep 2016 01:46:03 -0400 Received: from mx0b-001b2d01.pphosted.com ([148.163.158.5]:48602 "EHLO mx0a-001b2d01.pphosted.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1755785AbcISFpy (ORCPT ); Mon, 19 Sep 2016 01:45:54 -0400 From: "Aneesh Kumar K.V" To: Christophe Leroy , Benjamin Herrenschmidt , Paul Mackerras , Michael Ellerman , Scott Wood Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org Subject: Re: [PATCH v2 2/3] powerpc: get hugetlbpage handling more generic In-Reply-To: References: Date: Mon, 19 Sep 2016 11:15:44 +0530 MIME-Version: 1.0 Content-Type: text/plain X-TM-AS-GCONF: 00 X-Content-Scanned: Fidelis XPS MAILER x-cbid: 16091905-0020-0000-0000-000009D40370 X-IBM-SpamModules-Scores: X-IBM-SpamModules-Versions: BY=3.00005783; HX=3.00000240; KW=3.00000007; PH=3.00000004; SC=3.00000185; SDB=6.00759043; UDB=6.00360654; IPR=6.00533194; BA=6.00004734; NDR=6.00000001; ZLA=6.00000005; ZF=6.00000009; ZB=6.00000000; ZP=6.00000000; ZH=6.00000000; ZU=6.00000002; MB=3.00012708; XFM=3.00000011; UTC=2016-09-19 05:45:51 X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 16091905-0021-0000-0000-000055B92A34 Message-Id: <87poo0xx13.fsf@linux.vnet.ibm.com> X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2016-09-19_04:,, signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 spamscore=0 suspectscore=1 malwarescore=0 phishscore=0 adultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1609020000 definitions=main-1609190084 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Christophe Leroy writes: > Today there are two implementations of hugetlbpages which are managed > by exclusive #ifdefs: > * FSL_BOOKE: several directory entries points to the same single hugepage > * BOOK3S: one upper level directory entry points to a table of hugepages > > In preparation of implementation of hugepage support on the 8xx, we > need a mix of the two above solutions, because the 8xx needs both cases > depending on the size of pages: > * In 4k page size mode, each PGD entry covers a 4M bytes area. It means > that 2 PGD entries will be necessary to cover an 8M hugepage while a > single PGD entry will cover 8x 512k hugepages. > * In 16 page size mode, each PGD entry covers a 64M bytes area. It means > that 8x 8M hugepages will be covered by one PGD entry and 64x 512k > hugepages will be covers by one PGD entry. > > This patch: > * removes #ifdefs in favor of if/else based on the range sizes > * merges the two huge_pte_alloc() functions as they are pretty similar > * merges the two hugetlbpage_init() functions as they are pretty similar > > Signed-off-by: Christophe Leroy > --- > v2: This part is new and results from a split of last patch of v1 serie in > two parts > > arch/powerpc/mm/hugetlbpage.c | 189 +++++++++++++++++------------------------- > 1 file changed, 77 insertions(+), 112 deletions(-) > > diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c > index 8a512b1..2119f00 100644 > --- a/arch/powerpc/mm/hugetlbpage.c > +++ b/arch/powerpc/mm/hugetlbpage.c > @@ -64,14 +64,16 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, > { > struct kmem_cache *cachep; > pte_t *new; > - > -#ifdef CONFIG_PPC_FSL_BOOK3E > int i; > - int num_hugepd = 1 << (pshift - pdshift); > - cachep = hugepte_cache; > -#else > - cachep = PGT_CACHE(pdshift - pshift); > -#endif > + int num_hugepd; > + > + if (pshift >= pdshift) { > + cachep = hugepte_cache; > + num_hugepd = 1 << (pshift - pdshift); > + } else { > + cachep = PGT_CACHE(pdshift - pshift); > + num_hugepd = 1; > + } Is there a way to hint likely/unlikely branch based on the page size selected at build time ? > > new = kmem_cache_zalloc(cachep, GFP_KERNEL); > > @@ -89,7 +91,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, > smp_wmb(); > > spin_lock(&mm->page_table_lock); > -#ifdef CONFIG_PPC_FSL_BOOK3E > + > /* > * We have multiple higher-level entries that point to the same > * actual pte location. Fill in each as we go and backtrack on error. > @@ -100,8 +102,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, > if (unlikely(!hugepd_none(*hpdp))) > break; > else .... > -#ifdef CONFIG_PPC_FSL_BOOK3E > struct kmem_cache *hugepte_cache; > static int __init hugetlbpage_init(void) > { > int psize; > > - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { > - unsigned shift; > - > - if (!mmu_psize_defs[psize].shift) > - continue; > - > - shift = mmu_psize_to_shift(psize); > - > - /* Don't treat normal page sizes as huge... */ > - if (shift != PAGE_SHIFT) > - if (add_huge_page_size(1ULL << shift) < 0) > - continue; > - } > - > - /* > - * Create a kmem cache for hugeptes. The bottom bits in the pte have > - * size information encoded in them, so align them to allow this > - */ > - hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), > - HUGEPD_SHIFT_MASK + 1, 0, NULL); > - if (hugepte_cache == NULL) > - panic("%s: Unable to create kmem cache for hugeptes\n", > - __func__); > - > - /* Default hpage size = 4M */ > - if (mmu_psize_defs[MMU_PAGE_4M].shift) > - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; > - else > - panic("%s: Unable to set default huge page size\n", __func__); > - > - > - return 0; > -} > -#else > -static int __init hugetlbpage_init(void) > -{ > - int psize; > - > +#if !defined(CONFIG_PPC_FSL_BOOK3E) > if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) > return -ENODEV; > - > +#endif Do we need that #if ? radix_enabled() should become 0 and that if condition should be removed at compile time isn't it ? or are you finding errors with that ? > for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { > unsigned shift; > unsigned pdshift; > @@ -860,16 +807,31 @@ static int __init hugetlbpage_init(void) > * if we have pdshift and shift value same, we don't > * use pgt cache for hugepd. > */ > - if (pdshift != shift) { > + if (pdshift > shift) { > pgtable_cache_add(pdshift - shift, NULL); > if (!PGT_CACHE(pdshift - shift)) > panic("hugetlbpage_init(): could not create " > "pgtable cache for %d bit pagesize\n", shift); > + } else if (!hugepte_cache) { > + /* > + * Create a kmem cache for hugeptes. The bottom bits in > + * the pte have size information encoded in them, so > + * align them to allow this > + */ > + hugepte_cache = kmem_cache_create("hugepte-cache", > + sizeof(pte_t), > + HUGEPD_SHIFT_MASK + 1, > + 0, NULL); > + if (hugepte_cache == NULL) > + panic("%s: Unable to create kmem cache " > + "for hugeptes\n", __func__); > + We don't need hugepte_cache for book3s 64K. I guess we will endup creating one here ? > } > } > > /* Set default large page size. Currently, we pick 16M or 1M > * depending on what is available > + * We select 4M on other ones. > */ > if (mmu_psize_defs[MMU_PAGE_16M].shift) > HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; > @@ -877,11 +839,14 @@ static int __init hugetlbpage_init(void) > HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; > else if (mmu_psize_defs[MMU_PAGE_2M].shift) > HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; > - > + else if (mmu_psize_defs[MMU_PAGE_4M].shift) > + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; > + else > + panic("%s: Unable to set default huge page size\n", __func__); > > return 0; > } > -#endif > + > arch_initcall(hugetlbpage_init); > > void flush_dcache_icache_hugepage(struct page *page) > -- > 2.1.0