All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: David Gibson <dwg@au1.ibm.com>
Cc: paulus@samba.org, linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org
Subject: Re: [PATCH -V5 24/25] powerpc: Optimize hugepage invalidate
Date: Sun, 14 Apr 2013 15:32:12 +0530	[thread overview]
Message-ID: <8761zpqvkr.fsf@linux.vnet.ibm.com> (raw)
In-Reply-To: <20130412042104.GH5065@truffula.fritz.box>

David Gibson <dwg@au1.ibm.com> writes:

> On Thu, Apr 04, 2013 at 11:28:02AM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> Hugepage invalidate involves invalidating multiple hpte entries.
>> Optimize the operation using H_BULK_REMOVE on lpar platforms.
>> On native, reduce the number of tlb flush.
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/machdep.h    |    3 +
>>  arch/powerpc/mm/hash_native_64.c      |   78 ++++++++++++++++++++
>>  arch/powerpc/mm/pgtable.c             |   13 +++-
>>  arch/powerpc/platforms/pseries/lpar.c |  126 +++++++++++++++++++++++++++++++--
>>  4 files changed, 210 insertions(+), 10 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
>> index 6cee6e0..3bc7816 100644
>> --- a/arch/powerpc/include/asm/machdep.h
>> +++ b/arch/powerpc/include/asm/machdep.h
>> @@ -56,6 +56,9 @@ struct machdep_calls {
>>  	void            (*hpte_removebolted)(unsigned long ea,
>>  					     int psize, int ssize);
>>  	void		(*flush_hash_range)(unsigned long number, int local);
>> +	void		(*hugepage_invalidate)(struct mm_struct *mm,
>> +					       unsigned char *hpte_slot_array,
>> +					       unsigned long addr, int psize);
>>  
>>  	/* special for kexec, to be called in real mode, linear mapping is
>>  	 * destroyed as well */
>> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
>> index ac84fa6..59f29bf 100644
>> --- a/arch/powerpc/mm/hash_native_64.c
>> +++ b/arch/powerpc/mm/hash_native_64.c
>> @@ -450,6 +450,83 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
>>  	local_irq_restore(flags);
>>  }
>>  
>> +static void native_hugepage_invalidate(struct mm_struct *mm,
>> +				       unsigned char *hpte_slot_array,
>> +				       unsigned long addr, int psize)
>> +{
>> +	int ssize = 0, i;
>> +	int lock_tlbie;
>> +	struct hash_pte *hptep;
>> +	int actual_psize = MMU_PAGE_16M;
>> +	unsigned int max_hpte_count, valid;
>> +	unsigned long flags, s_addr = addr;
>> +	unsigned long hpte_v, want_v, shift;
>> +	unsigned long hidx, vpn = 0, vsid, hash, slot;
>> +
>> +	shift = mmu_psize_defs[psize].shift;
>> +	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
>> +
>> +	local_irq_save(flags);
>> +	for (i = 0; i < max_hpte_count; i++) {
>> +		/*
>> +		 * 8 bits per each hpte entries
>> +		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
>> +		 */
>> +		valid = hpte_slot_array[i] & 0x1;
>> +		if (!valid)
>> +			continue;
>> +		hidx =  hpte_slot_array[i]  >> 1;
>> +
>> +		/* get the vpn */
>> +		addr = s_addr + (i * (1ul << shift));
>> +		if (!is_kernel_addr(addr)) {
>> +			ssize = user_segment_size(addr);
>> +			vsid = get_vsid(mm->context.id, addr, ssize);
>> +			WARN_ON(vsid == 0);
>> +		} else {
>> +			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>> +			ssize = mmu_kernel_ssize;
>> +		}
>> +
>> +		vpn = hpt_vpn(addr, vsid, ssize);
>> +		hash = hpt_hash(vpn, shift, ssize);
>> +		if (hidx & _PTEIDX_SECONDARY)
>> +			hash = ~hash;
>> +
>> +		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
>> +		slot += hidx & _PTEIDX_GROUP_IX;
>> +
>> +		hptep = htab_address + slot;
>> +		want_v = hpte_encode_avpn(vpn, psize, ssize);
>> +		native_lock_hpte(hptep);
>> +		hpte_v = hptep->v;
>> +
>> +		/* Even if we miss, we need to invalidate the TLB */
>> +		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
>> +			native_unlock_hpte(hptep);
>> +		else
>> +			/* Invalidate the hpte. NOTE: this also unlocks it */
>> +			hptep->v = 0;
>
> Shouldn't you be clearing the entry from the slot_array once it is
> invalidated in the hash table?

We don't need to do that. We should be fine even if hptes get
invalidated under us. Also inorder to update slot_array i will have to
mark the corresponding hpte busy, so that we can ensure nobody is
looking at the slot array.

>
>> +	}
>> +	/*
>> +	 * Since this is a hugepage, we just need a single tlbie.
>> +	 * use the last vpn.
>> +	 */
>> +	lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
>> +	if (lock_tlbie)
>> +		raw_spin_lock(&native_tlbie_lock);
>> +
>> +	asm volatile("ptesync":::"memory");
>> +	__tlbie(vpn, psize, actual_psize, ssize);
>> +	asm volatile("eieio; tlbsync; ptesync":::"memory");
>> +
>> +	if (lock_tlbie)
>> +		raw_spin_unlock(&native_tlbie_lock);
>> +
>> +	local_irq_restore(flags);
>> +}
>> +
>> +
>>  static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
>>  			int *psize, int *apsize, int *ssize, unsigned long *vpn)
>>  {
>> @@ -678,4 +755,5 @@ void __init hpte_init_native(void)
>>  	ppc_md.hpte_remove	= native_hpte_remove;
>>  	ppc_md.hpte_clear_all	= native_hpte_clear;
>>  	ppc_md.flush_hash_range = native_flush_hash_range;
>> +	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
>>  }
>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index fbff062..386cab8 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -433,6 +433,7 @@ void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
>>  {
>>  	int ssize, i;
>>  	unsigned long s_addr;
>> +	int max_hpte_count;
>>  	unsigned int psize, valid;
>>  	unsigned char *hpte_slot_array;
>>  	unsigned long hidx, vpn, vsid, hash, shift, slot;
>> @@ -446,12 +447,18 @@ void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
>>  	 * second half of the PMD
>>  	 */
>>  	hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
>> -
>>  	/* get the base page size */
>>  	psize = get_slice_psize(mm, s_addr);
>> -	shift = mmu_psize_defs[psize].shift;
>>  
>> -	for (i = 0; i < HUGE_PAGE_SIZE/(1ul << shift); i++) {
>> +	if (ppc_md.hugepage_invalidate)
>> +		return ppc_md.hugepage_invalidate(mm, hpte_slot_array,
>> +						  s_addr, psize);
>> +	/*
>> +	 * No bluk hpte removal support, invalidate each entry
>> +	 */
>> +	shift = mmu_psize_defs[psize].shift;
>> +	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
>> +	for (i = 0; i < max_hpte_count; i++) {
>>  		/*
>>  		 * 8 bits per each hpte entries
>>  		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
>> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
>> index 3daced3..5fcc621 100644
>> --- a/arch/powerpc/platforms/pseries/lpar.c
>> +++ b/arch/powerpc/platforms/pseries/lpar.c
>> @@ -45,6 +45,13 @@
>>  #include "plpar_wrappers.h"
>>  #include "pseries.h"
>>  
>> +/* Flag bits for H_BULK_REMOVE */
>> +#define HBR_REQUEST	0x4000000000000000UL
>> +#define HBR_RESPONSE	0x8000000000000000UL
>> +#define HBR_END		0xc000000000000000UL
>> +#define HBR_AVPN	0x0200000000000000UL
>> +#define HBR_ANDCOND	0x0100000000000000UL
>> +
>>  
>>  /* in hvCall.S */
>>  EXPORT_SYMBOL(plpar_hcall);
>> @@ -339,6 +346,117 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
>>  	BUG_ON(lpar_rc != H_SUCCESS);
>>  }
>>  
>> +/*
>> + * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
>> + * to make sure that we avoid bouncing the hypervisor tlbie lock.
>> + */
>> +#define PPC64_HUGE_HPTE_BATCH 12
>> +
>> +static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
>> +					     unsigned long *vpn, int count,
>> +					     int psize, int ssize)
>> +{
>> +	unsigned long param[9];
>> +	int i = 0, pix = 0, rc;
>> +	unsigned long flags = 0;
>> +	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
>> +
>> +	if (lock_tlbie)
>> +		spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
>> +
>> +	for (i = 0; i < count; i++) {
>> +
>> +		if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
>> +			pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize,
>> +						     ssize, 0);
>> +		} else {
>> +			param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
>> +			param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
>> +			pix += 2;
>> +			if (pix == 8) {
>> +				rc = plpar_hcall9(H_BULK_REMOVE, param,
>> +						  param[0], param[1], param[2],
>> +						  param[3], param[4], param[5],
>> +						  param[6], param[7]);
>> +				BUG_ON(rc != H_SUCCESS);
>> +				pix = 0;
>> +			}
>> +		}
>> +	}
>> +	if (pix) {
>> +		param[pix] = HBR_END;
>> +		rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
>> +				  param[2], param[3], param[4], param[5],
>> +				  param[6], param[7]);
>> +		BUG_ON(rc != H_SUCCESS);
>> +	}
>> +
>> +	if (lock_tlbie)
>> +		spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
>> +}
>> +
>> +static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
>> +				       unsigned char *hpte_slot_array,
>> +				       unsigned long addr, int psize)
>> +{
>> +	int ssize = 0, i, index = 0;
>> +	unsigned long s_addr = addr;
>> +	unsigned int max_hpte_count, valid;
>> +	unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
>> +	unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
>
> These are really too big to be allocating on the stack.  You'd be
> better off going direct from the char slot array to the data structure
> for H_BULK_REMOVE, rather than introducing this intermediate
> structure.

The reason i wanted to do that was to make sure i don't lock/unlock
pSeries_lpar_tlbie_lock that frequently, ie, for ever H_BULK_REMOVE.
The total size taken by both the array is only 192 bytes. Is that big
enough to create trouble ?

>
>> +	unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
>> +
>> +	shift = mmu_psize_defs[psize].shift;
>> +	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
>> +
>> +	for (i = 0; i < max_hpte_count; i++) {
>> +		/*
>> +		 * 8 bits per each hpte entries
>> +		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
>> +		 */
>> +		valid = hpte_slot_array[i] & 0x1;
>> +		if (!valid)
>> +			continue;
>> +		hidx =  hpte_slot_array[i]  >> 1;
>> +
>> +		/* get the vpn */
>> +		addr = s_addr + (i * (1ul << shift));
>> +		if (!is_kernel_addr(addr)) {
>> +			ssize = user_segment_size(addr);
>> +			vsid = get_vsid(mm->context.id, addr, ssize);
>> +			WARN_ON(vsid == 0);
>> +		} else {
>> +			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>> +			ssize = mmu_kernel_ssize;
>> +		}
>> +
>> +		vpn = hpt_vpn(addr, vsid, ssize);
>> +		hash = hpt_hash(vpn, shift, ssize);
>> +		if (hidx & _PTEIDX_SECONDARY)
>> +			hash = ~hash;
>> +
>> +		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
>> +		slot += hidx & _PTEIDX_GROUP_IX;
>> +
>> +		slot_array[index] = slot;
>> +		vpn_array[index] = vpn;
>> +		if (index == PPC64_HUGE_HPTE_BATCH - 1) {
>> +			/*
>> +			 * Now do a bluk invalidate
>> +			 */
>> +			__pSeries_lpar_hugepage_invalidate(slot_array,
>> +							   vpn_array,
>> +							   PPC64_HUGE_HPTE_BATCH,
>> +							   psize, ssize);
>> +			index = 0;
>> +		} else
>> +			index++;
>> +	}
>> +	if (index)
>> +		__pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
>> +						   index, psize, ssize);
>> +}
>> +
>>  static void pSeries_lpar_hpte_removebolted(unsigned long ea,
>>  					   int psize, int ssize)
>>  {
>> @@ -354,13 +472,6 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
>>  	pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
>>  }
>>  
>> -/* Flag bits for H_BULK_REMOVE */
>> -#define HBR_REQUEST	0x4000000000000000UL
>> -#define HBR_RESPONSE	0x8000000000000000UL
>> -#define HBR_END		0xc000000000000000UL
>> -#define HBR_AVPN	0x0200000000000000UL
>> -#define HBR_ANDCOND	0x0100000000000000UL
>> -
>>  /*
>>   * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
>>   * lock.
>> @@ -446,6 +557,7 @@ void __init hpte_init_lpar(void)
>>  	ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
>>  	ppc_md.flush_hash_range	= pSeries_lpar_flush_hash_range;
>>  	ppc_md.hpte_clear_all   = pSeries_lpar_hptab_clear;
>> +	ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
>>  }
>>  
>>  #ifdef CONFIG_PPC_SMLPAR

-aneesh

WARNING: multiple messages have this Message-ID (diff)
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: David Gibson <dwg@au1.ibm.com>
Cc: benh@kernel.crashing.org, paulus@samba.org, linux-mm@kvack.org,
	linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH -V5 24/25] powerpc: Optimize hugepage invalidate
Date: Sun, 14 Apr 2013 15:32:12 +0530	[thread overview]
Message-ID: <8761zpqvkr.fsf@linux.vnet.ibm.com> (raw)
In-Reply-To: <20130412042104.GH5065@truffula.fritz.box>

David Gibson <dwg@au1.ibm.com> writes:

> On Thu, Apr 04, 2013 at 11:28:02AM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> Hugepage invalidate involves invalidating multiple hpte entries.
>> Optimize the operation using H_BULK_REMOVE on lpar platforms.
>> On native, reduce the number of tlb flush.
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/machdep.h    |    3 +
>>  arch/powerpc/mm/hash_native_64.c      |   78 ++++++++++++++++++++
>>  arch/powerpc/mm/pgtable.c             |   13 +++-
>>  arch/powerpc/platforms/pseries/lpar.c |  126 +++++++++++++++++++++++++++++++--
>>  4 files changed, 210 insertions(+), 10 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
>> index 6cee6e0..3bc7816 100644
>> --- a/arch/powerpc/include/asm/machdep.h
>> +++ b/arch/powerpc/include/asm/machdep.h
>> @@ -56,6 +56,9 @@ struct machdep_calls {
>>  	void            (*hpte_removebolted)(unsigned long ea,
>>  					     int psize, int ssize);
>>  	void		(*flush_hash_range)(unsigned long number, int local);
>> +	void		(*hugepage_invalidate)(struct mm_struct *mm,
>> +					       unsigned char *hpte_slot_array,
>> +					       unsigned long addr, int psize);
>>  
>>  	/* special for kexec, to be called in real mode, linear mapping is
>>  	 * destroyed as well */
>> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
>> index ac84fa6..59f29bf 100644
>> --- a/arch/powerpc/mm/hash_native_64.c
>> +++ b/arch/powerpc/mm/hash_native_64.c
>> @@ -450,6 +450,83 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
>>  	local_irq_restore(flags);
>>  }
>>  
>> +static void native_hugepage_invalidate(struct mm_struct *mm,
>> +				       unsigned char *hpte_slot_array,
>> +				       unsigned long addr, int psize)
>> +{
>> +	int ssize = 0, i;
>> +	int lock_tlbie;
>> +	struct hash_pte *hptep;
>> +	int actual_psize = MMU_PAGE_16M;
>> +	unsigned int max_hpte_count, valid;
>> +	unsigned long flags, s_addr = addr;
>> +	unsigned long hpte_v, want_v, shift;
>> +	unsigned long hidx, vpn = 0, vsid, hash, slot;
>> +
>> +	shift = mmu_psize_defs[psize].shift;
>> +	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
>> +
>> +	local_irq_save(flags);
>> +	for (i = 0; i < max_hpte_count; i++) {
>> +		/*
>> +		 * 8 bits per each hpte entries
>> +		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
>> +		 */
>> +		valid = hpte_slot_array[i] & 0x1;
>> +		if (!valid)
>> +			continue;
>> +		hidx =  hpte_slot_array[i]  >> 1;
>> +
>> +		/* get the vpn */
>> +		addr = s_addr + (i * (1ul << shift));
>> +		if (!is_kernel_addr(addr)) {
>> +			ssize = user_segment_size(addr);
>> +			vsid = get_vsid(mm->context.id, addr, ssize);
>> +			WARN_ON(vsid == 0);
>> +		} else {
>> +			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>> +			ssize = mmu_kernel_ssize;
>> +		}
>> +
>> +		vpn = hpt_vpn(addr, vsid, ssize);
>> +		hash = hpt_hash(vpn, shift, ssize);
>> +		if (hidx & _PTEIDX_SECONDARY)
>> +			hash = ~hash;
>> +
>> +		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
>> +		slot += hidx & _PTEIDX_GROUP_IX;
>> +
>> +		hptep = htab_address + slot;
>> +		want_v = hpte_encode_avpn(vpn, psize, ssize);
>> +		native_lock_hpte(hptep);
>> +		hpte_v = hptep->v;
>> +
>> +		/* Even if we miss, we need to invalidate the TLB */
>> +		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
>> +			native_unlock_hpte(hptep);
>> +		else
>> +			/* Invalidate the hpte. NOTE: this also unlocks it */
>> +			hptep->v = 0;
>
> Shouldn't you be clearing the entry from the slot_array once it is
> invalidated in the hash table?

We don't need to do that. We should be fine even if hptes get
invalidated under us. Also inorder to update slot_array i will have to
mark the corresponding hpte busy, so that we can ensure nobody is
looking at the slot array.

>
>> +	}
>> +	/*
>> +	 * Since this is a hugepage, we just need a single tlbie.
>> +	 * use the last vpn.
>> +	 */
>> +	lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
>> +	if (lock_tlbie)
>> +		raw_spin_lock(&native_tlbie_lock);
>> +
>> +	asm volatile("ptesync":::"memory");
>> +	__tlbie(vpn, psize, actual_psize, ssize);
>> +	asm volatile("eieio; tlbsync; ptesync":::"memory");
>> +
>> +	if (lock_tlbie)
>> +		raw_spin_unlock(&native_tlbie_lock);
>> +
>> +	local_irq_restore(flags);
>> +}
>> +
>> +
>>  static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
>>  			int *psize, int *apsize, int *ssize, unsigned long *vpn)
>>  {
>> @@ -678,4 +755,5 @@ void __init hpte_init_native(void)
>>  	ppc_md.hpte_remove	= native_hpte_remove;
>>  	ppc_md.hpte_clear_all	= native_hpte_clear;
>>  	ppc_md.flush_hash_range = native_flush_hash_range;
>> +	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
>>  }
>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index fbff062..386cab8 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -433,6 +433,7 @@ void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
>>  {
>>  	int ssize, i;
>>  	unsigned long s_addr;
>> +	int max_hpte_count;
>>  	unsigned int psize, valid;
>>  	unsigned char *hpte_slot_array;
>>  	unsigned long hidx, vpn, vsid, hash, shift, slot;
>> @@ -446,12 +447,18 @@ void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
>>  	 * second half of the PMD
>>  	 */
>>  	hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
>> -
>>  	/* get the base page size */
>>  	psize = get_slice_psize(mm, s_addr);
>> -	shift = mmu_psize_defs[psize].shift;
>>  
>> -	for (i = 0; i < HUGE_PAGE_SIZE/(1ul << shift); i++) {
>> +	if (ppc_md.hugepage_invalidate)
>> +		return ppc_md.hugepage_invalidate(mm, hpte_slot_array,
>> +						  s_addr, psize);
>> +	/*
>> +	 * No bluk hpte removal support, invalidate each entry
>> +	 */
>> +	shift = mmu_psize_defs[psize].shift;
>> +	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
>> +	for (i = 0; i < max_hpte_count; i++) {
>>  		/*
>>  		 * 8 bits per each hpte entries
>>  		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
>> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
>> index 3daced3..5fcc621 100644
>> --- a/arch/powerpc/platforms/pseries/lpar.c
>> +++ b/arch/powerpc/platforms/pseries/lpar.c
>> @@ -45,6 +45,13 @@
>>  #include "plpar_wrappers.h"
>>  #include "pseries.h"
>>  
>> +/* Flag bits for H_BULK_REMOVE */
>> +#define HBR_REQUEST	0x4000000000000000UL
>> +#define HBR_RESPONSE	0x8000000000000000UL
>> +#define HBR_END		0xc000000000000000UL
>> +#define HBR_AVPN	0x0200000000000000UL
>> +#define HBR_ANDCOND	0x0100000000000000UL
>> +
>>  
>>  /* in hvCall.S */
>>  EXPORT_SYMBOL(plpar_hcall);
>> @@ -339,6 +346,117 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
>>  	BUG_ON(lpar_rc != H_SUCCESS);
>>  }
>>  
>> +/*
>> + * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
>> + * to make sure that we avoid bouncing the hypervisor tlbie lock.
>> + */
>> +#define PPC64_HUGE_HPTE_BATCH 12
>> +
>> +static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
>> +					     unsigned long *vpn, int count,
>> +					     int psize, int ssize)
>> +{
>> +	unsigned long param[9];
>> +	int i = 0, pix = 0, rc;
>> +	unsigned long flags = 0;
>> +	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
>> +
>> +	if (lock_tlbie)
>> +		spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
>> +
>> +	for (i = 0; i < count; i++) {
>> +
>> +		if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
>> +			pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize,
>> +						     ssize, 0);
>> +		} else {
>> +			param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
>> +			param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
>> +			pix += 2;
>> +			if (pix == 8) {
>> +				rc = plpar_hcall9(H_BULK_REMOVE, param,
>> +						  param[0], param[1], param[2],
>> +						  param[3], param[4], param[5],
>> +						  param[6], param[7]);
>> +				BUG_ON(rc != H_SUCCESS);
>> +				pix = 0;
>> +			}
>> +		}
>> +	}
>> +	if (pix) {
>> +		param[pix] = HBR_END;
>> +		rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
>> +				  param[2], param[3], param[4], param[5],
>> +				  param[6], param[7]);
>> +		BUG_ON(rc != H_SUCCESS);
>> +	}
>> +
>> +	if (lock_tlbie)
>> +		spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
>> +}
>> +
>> +static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
>> +				       unsigned char *hpte_slot_array,
>> +				       unsigned long addr, int psize)
>> +{
>> +	int ssize = 0, i, index = 0;
>> +	unsigned long s_addr = addr;
>> +	unsigned int max_hpte_count, valid;
>> +	unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
>> +	unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
>
> These are really too big to be allocating on the stack.  You'd be
> better off going direct from the char slot array to the data structure
> for H_BULK_REMOVE, rather than introducing this intermediate
> structure.

The reason i wanted to do that was to make sure i don't lock/unlock
pSeries_lpar_tlbie_lock that frequently, ie, for ever H_BULK_REMOVE.
The total size taken by both the array is only 192 bytes. Is that big
enough to create trouble ?

>
>> +	unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
>> +
>> +	shift = mmu_psize_defs[psize].shift;
>> +	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
>> +
>> +	for (i = 0; i < max_hpte_count; i++) {
>> +		/*
>> +		 * 8 bits per each hpte entries
>> +		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
>> +		 */
>> +		valid = hpte_slot_array[i] & 0x1;
>> +		if (!valid)
>> +			continue;
>> +		hidx =  hpte_slot_array[i]  >> 1;
>> +
>> +		/* get the vpn */
>> +		addr = s_addr + (i * (1ul << shift));
>> +		if (!is_kernel_addr(addr)) {
>> +			ssize = user_segment_size(addr);
>> +			vsid = get_vsid(mm->context.id, addr, ssize);
>> +			WARN_ON(vsid == 0);
>> +		} else {
>> +			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>> +			ssize = mmu_kernel_ssize;
>> +		}
>> +
>> +		vpn = hpt_vpn(addr, vsid, ssize);
>> +		hash = hpt_hash(vpn, shift, ssize);
>> +		if (hidx & _PTEIDX_SECONDARY)
>> +			hash = ~hash;
>> +
>> +		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
>> +		slot += hidx & _PTEIDX_GROUP_IX;
>> +
>> +		slot_array[index] = slot;
>> +		vpn_array[index] = vpn;
>> +		if (index == PPC64_HUGE_HPTE_BATCH - 1) {
>> +			/*
>> +			 * Now do a bluk invalidate
>> +			 */
>> +			__pSeries_lpar_hugepage_invalidate(slot_array,
>> +							   vpn_array,
>> +							   PPC64_HUGE_HPTE_BATCH,
>> +							   psize, ssize);
>> +			index = 0;
>> +		} else
>> +			index++;
>> +	}
>> +	if (index)
>> +		__pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
>> +						   index, psize, ssize);
>> +}
>> +
>>  static void pSeries_lpar_hpte_removebolted(unsigned long ea,
>>  					   int psize, int ssize)
>>  {
>> @@ -354,13 +472,6 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
>>  	pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
>>  }
>>  
>> -/* Flag bits for H_BULK_REMOVE */
>> -#define HBR_REQUEST	0x4000000000000000UL
>> -#define HBR_RESPONSE	0x8000000000000000UL
>> -#define HBR_END		0xc000000000000000UL
>> -#define HBR_AVPN	0x0200000000000000UL
>> -#define HBR_ANDCOND	0x0100000000000000UL
>> -
>>  /*
>>   * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
>>   * lock.
>> @@ -446,6 +557,7 @@ void __init hpte_init_lpar(void)
>>  	ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
>>  	ppc_md.flush_hash_range	= pSeries_lpar_flush_hash_range;
>>  	ppc_md.hpte_clear_all   = pSeries_lpar_hptab_clear;
>> +	ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
>>  }
>>  
>>  #ifdef CONFIG_PPC_SMLPAR

-aneesh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-04-14 10:02 UTC|newest]

Thread overview: 146+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-04  5:57 [PATCH -V5 00/25] THP support for PPC64 Aneesh Kumar K.V
2013-04-04  5:57 ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 01/25] powerpc: Use signed formatting when printing error Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 02/25] powerpc: Save DAR and DSISR in pt_regs on MCE Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 03/25] powerpc: Don't hard code the size of pte page Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 04/25] powerpc: Reduce the PTE_INDEX_SIZE Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  7:10   ` David Gibson
2013-04-11  7:10     ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 05/25] powerpc: Move the pte free routines from common header Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 06/25] powerpc: Reduce PTE table memory wastage Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-10  4:46   ` David Gibson
2013-04-10  4:46     ` David Gibson
2013-04-10  6:29     ` Aneesh Kumar K.V
2013-04-10  6:29       ` Aneesh Kumar K.V
2013-04-10  7:04       ` David Gibson
2013-04-10  7:04         ` David Gibson
2013-04-10  7:53         ` Aneesh Kumar K.V
2013-04-10  7:53           ` Aneesh Kumar K.V
2013-04-10 17:47           ` Aneesh Kumar K.V
2013-04-10 17:47             ` Aneesh Kumar K.V
2013-04-11  1:20             ` David Gibson
2013-04-11  1:20               ` David Gibson
2013-04-11  1:12           ` David Gibson
2013-04-11  1:12             ` David Gibson
2013-04-10  7:14   ` Michael Ellerman
2013-04-10  7:14     ` Michael Ellerman
2013-04-10  7:54     ` Aneesh Kumar K.V
2013-04-10  7:54       ` Aneesh Kumar K.V
2013-04-10  8:52       ` Aneesh Kumar K.V
2013-04-10  8:52         ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 07/25] powerpc: Use encode avpn where we need only avpn values Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 08/25] powerpc: Decode the pte-lp-encoding bits correctly Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-10  7:19   ` David Gibson
2013-04-10  7:19     ` David Gibson
2013-04-10  8:11     ` Aneesh Kumar K.V
2013-04-10  8:11       ` Aneesh Kumar K.V
2013-04-10 17:49       ` Aneesh Kumar K.V
2013-04-10 17:49         ` Aneesh Kumar K.V
2013-04-11  1:28       ` David Gibson
2013-04-11  1:28         ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 09/25] powerpc: Fix hpte_decode to use the correct decoding for page sizes Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  3:20   ` David Gibson
2013-04-11  3:20     ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 10/25] powerpc: print both base and actual page size on hash failure Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  3:21   ` David Gibson
2013-04-11  3:21     ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 11/25] powerpc: Print page size info during boot Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 12/25] powerpc: Return all the valid pte ecndoing in KVM_PPC_GET_SMMU_INFO ioctl Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  3:24   ` David Gibson
2013-04-11  3:24     ` David Gibson
2013-04-11  5:11     ` Aneesh Kumar K.V
2013-04-11  5:11       ` Aneesh Kumar K.V
2013-04-11  5:57       ` David Gibson
2013-04-11  5:57         ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 13/25] powerpc: Update tlbie/tlbiel as per ISA doc Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  3:30   ` David Gibson
2013-04-11  3:30     ` David Gibson
2013-04-11  5:20     ` Aneesh Kumar K.V
2013-04-11  5:20       ` Aneesh Kumar K.V
2013-04-11  6:16       ` David Gibson
2013-04-11  6:16         ` David Gibson
2013-04-11  6:36         ` Aneesh Kumar K.V
2013-04-11  6:36           ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 14/25] mm/THP: HPAGE_SHIFT is not a #define on some arch Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  3:36   ` David Gibson
2013-04-11  3:36     ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 15/25] mm/THP: Add pmd args to pgtable deposit and withdraw APIs Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  3:40   ` David Gibson
2013-04-11  3:40     ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 16/25] mm/THP: withdraw the pgtable after pmdp related operations Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 17/25] powerpc/THP: Implement transparent hugepages for ppc64 Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  5:38   ` David Gibson
2013-04-11  5:38     ` David Gibson
2013-04-11  7:40     ` Aneesh Kumar K.V
2013-04-11  7:40       ` Aneesh Kumar K.V
2013-04-12  0:51       ` David Gibson
2013-04-12  0:51         ` David Gibson
2013-04-12  5:06         ` Aneesh Kumar K.V
2013-04-12  5:06           ` Aneesh Kumar K.V
2013-04-12  5:39           ` David Gibson
2013-04-12  5:39             ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 18/25] powerpc/THP: Double the PMD table size for THP Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-11  6:18   ` David Gibson
2013-04-11  6:18     ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 19/25] powerpc/THP: Differentiate THP PMD entries from HUGETLB PMD entries Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-10  7:21   ` Michael Ellerman
2013-04-10  7:21     ` Michael Ellerman
2013-04-10 18:26     ` Aneesh Kumar K.V
2013-04-10 18:26       ` Aneesh Kumar K.V
2013-04-12  1:28   ` David Gibson
2013-04-12  1:28     ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 20/25] powerpc/THP: Add code to handle HPTE faults for large pages Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-12  4:01   ` David Gibson
2013-04-12  4:01     ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 21/25] powerpc: Handle hugepage in perf callchain Aneesh Kumar K.V
2013-04-04  5:57   ` Aneesh Kumar K.V
2013-04-12  1:34   ` David Gibson
2013-04-12  1:34     ` David Gibson
2013-04-12  5:05     ` Aneesh Kumar K.V
2013-04-12  5:05       ` Aneesh Kumar K.V
2013-04-04  5:58 ` [PATCH -V5 22/25] powerpc/THP: get_user_pages_fast changes Aneesh Kumar K.V
2013-04-04  5:58   ` Aneesh Kumar K.V
2013-04-12  1:41   ` David Gibson
2013-04-12  1:41     ` David Gibson
2013-04-04  5:58 ` [PATCH -V5 23/25] powerpc/THP: Enable THP on PPC64 Aneesh Kumar K.V
2013-04-04  5:58   ` Aneesh Kumar K.V
2013-04-04  5:58 ` [PATCH -V5 24/25] powerpc: Optimize hugepage invalidate Aneesh Kumar K.V
2013-04-04  5:58   ` Aneesh Kumar K.V
2013-04-12  4:21   ` David Gibson
2013-04-12  4:21     ` David Gibson
2013-04-14 10:02     ` Aneesh Kumar K.V [this message]
2013-04-14 10:02       ` Aneesh Kumar K.V
2013-04-15  1:18       ` David Gibson
2013-04-15  1:18         ` David Gibson
2013-04-04  5:58 ` [PATCH -V5 25/25] powerpc: Handle hugepages in kvm Aneesh Kumar K.V
2013-04-04  5:58   ` Aneesh Kumar K.V
2013-04-04  6:00 ` [PATCH -V5 00/25] THP support for PPC64 Simon Jeons
2013-04-04  6:00   ` Simon Jeons
2013-04-04  6:10   ` Aneesh Kumar K.V
2013-04-04  6:10     ` Aneesh Kumar K.V
2013-04-04  6:14 ` Simon Jeons
2013-04-04  6:14   ` Simon Jeons
2013-04-04  8:38   ` Aneesh Kumar K.V
2013-04-04  8:38     ` Aneesh Kumar K.V
2013-04-19  1:55 ` Simon Jeons
2013-04-19  1:55   ` Simon Jeons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8761zpqvkr.fsf@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=dwg@au1.ibm.com \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.