All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: torvalds@linux-foundation.org, corbet@lwn.net, will@kernel.org,
	boqun.feng@gmail.com, mark.rutland@arm.com,
	catalin.marinas@arm.com, dennis@kernel.org, tj@kernel.org,
	cl@linux.com, hca@linux.ibm.com, gor@linux.ibm.com,
	agordeev@linux.ibm.com, borntraeger@linux.ibm.com,
	svens@linux.ibm.com, tglx@linutronix.de, mingo@redhat.com,
	bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org,
	hpa@zytor.com, joro@8bytes.org, suravee.suthikulpanit@amd.com,
	robin.murphy@arm.com, dwmw2@infradead.org,
	baolu.lu@linux.intel.com, Arnd Bergmann <arnd@arndb.de>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	davem@davemloft.net, penberg@kernel.org, rientjes@google.com,
	iamjoonsoo.kim@lge.com, Andrew Morton <akpm@linux-foundation.org>,
	vbabka@suse.cz, roman.gushchin@linux.dev,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, linux-s390@vger.kernel.org,
	iommu@lists.linux.dev, linux-arch@vger.kernel.org,
	linux-crypto@vger.kernel.org
Subject: Re: [PATCH v2 08/10] slub: Replace cmpxchg_double()
Date: Wed, 8 Feb 2023 13:31:39 +0000	[thread overview]
Message-ID: <Y+OkOxpOnRYcI3DS@localhost> (raw)
In-Reply-To: <20230202152655.684926740@infradead.org>

On Thu, Feb 02, 2023 at 03:50:38PM +0100, Peter Zijlstra wrote:
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  include/linux/slub_def.h |   12 ++-
>  mm/slab.h                |   45 +++++++++++++-
>  mm/slub.c                |  142 ++++++++++++++++++++++++++++-------------------
>  3 files changed, 135 insertions(+), 64 deletions(-)
> 
> --- a/include/linux/slub_def.h
> +++ b/include/linux/slub_def.h
> @@ -39,7 +39,8 @@ enum stat_item {
>  	CPU_PARTIAL_FREE,	/* Refill cpu partial on free */
>  	CPU_PARTIAL_NODE,	/* Refill cpu partial from node partial */
>  	CPU_PARTIAL_DRAIN,	/* Drain cpu partial to node partial */
> -	NR_SLUB_STAT_ITEMS };
> +	NR_SLUB_STAT_ITEMS
> +};
>  
>  #ifndef CONFIG_SLUB_TINY
>  /*
> @@ -47,8 +48,13 @@ enum stat_item {
>   * with this_cpu_cmpxchg_double() alignment requirements.
>   */
>  struct kmem_cache_cpu {
> -	void **freelist;	/* Pointer to next available object */
> -	unsigned long tid;	/* Globally unique transaction id */
> +	union {
> +		struct {
> +			void **freelist;	/* Pointer to next available object */
> +			unsigned long tid;	/* Globally unique transaction id */
> +		};
> +		freelist_aba_t freelist_tid;
> +	};
>  	struct slab *slab;	/* The slab from which we are allocating */
>  #ifdef CONFIG_SLUB_CPU_PARTIAL
>  	struct slab *partial;	/* Partially allocated frozen slabs */
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -5,6 +5,34 @@
>   * Internal slab definitions
>   */
>  
> +/*
> + * Freelist pointer and counter to cmpxchg together, avoids the typical ABA
> + * problems with cmpxchg of just a pointer.
> + */
> +typedef union {
> +	struct {
> +		void *freelist;
> +		unsigned long counter;
> +	};
> +#ifdef CONFIG_64BIT
> +	u128 full;
> +#else
> +	u64 full;
> +#endif
> +} freelist_aba_t;
> +
> +#ifdef CONFIG_64BIT
> +# ifdef system_has_cmpxchg128
> +# define system_has_freelist_aba()	system_has_cmpxchg128()
> +# define try_cmpxchg_freelist		try_cmpxchg128
> +# endif
> +#else /* CONFIG_64BIT */
> +# ifdef system_has_cmpxchg64
> +# define system_has_freelist_aba()	system_has_cmpxchg64()
> +# define try_cmpxchg_freelist		try_cmpxchg64
> +# endif
> +#endif /* CONFIG_64BIT */
> +
>  /* Reuses the bits in struct page */
>  struct slab {
>  	unsigned long __page_flags;
> @@ -37,14 +65,21 @@ struct slab {
>  #endif
>  			};
>  			/* Double-word boundary */
> -			void *freelist;		/* first free object */
>  			union {
> -				unsigned long counters;
>  				struct {
> -					unsigned inuse:16;
> -					unsigned objects:15;
> -					unsigned frozen:1;
> +					void *freelist;		/* first free object */
> +					union {
> +						unsigned long counters;
> +						struct {
> +							unsigned inuse:16;
> +							unsigned objects:15;
> +							unsigned frozen:1;
> +						};
> +					};
>  				};
> +#ifdef system_has_freelist_aba
> +				freelist_aba_t freelist_counter;
> +#endif
>  			};
>  		};
>  		struct rcu_head rcu_head;
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -292,7 +292,13 @@ static inline bool kmem_cache_has_cpu_pa
>  /* Poison object */
>  #define __OBJECT_POISON		((slab_flags_t __force)0x80000000U)
>  /* Use cmpxchg_double */
> +
> +#if defined(system_has_freelist_aba) && \
> +    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
>  #define __CMPXCHG_DOUBLE	((slab_flags_t __force)0x40000000U)
> +#else
> +#define __CMPXCHG_DOUBLE	((slab_flags_t __force)0U)
> +#endif
>  
>  /*
>   * Tracking user of a slab.
> @@ -512,6 +518,43 @@ static __always_inline void slab_unlock(
>  	__bit_spin_unlock(PG_locked, &page->flags);
>  }
>  
> +static inline bool
> +__update_freelist_fast(struct slab *slab,
> +		      void *freelist_old, unsigned long counters_old,
> +		      void *freelist_new, unsigned long counters_new)
> +{
> +
> +	bool ret = false;
> +
> +#ifdef system_has_freelist_aba
> +	freelist_aba_t old = { .freelist = freelist_old, .counter = counters_old };
> +	freelist_aba_t new = { .freelist = freelist_new, .counter = counters_new };
> +
> +	ret = try_cmpxchg_freelist(&slab->freelist_counter.full, &old.full, new.full);
> +#endif /* system_has_freelist_aba */
> +
> +	return ret;
> +}
> +
> +static inline bool
> +__update_freelist_slow(struct slab *slab,
> +		      void *freelist_old, unsigned long counters_old,
> +		      void *freelist_new, unsigned long counters_new)
> +{
> +	bool ret = false;
> +
> +	slab_lock(slab);
> +	if (slab->freelist == freelist_old &&
> +	    slab->counters == counters_old) {
> +		slab->freelist = freelist_new;
> +		slab->counters = counters_new;
> +		ret = true;
> +	}
> +	slab_unlock(slab);
> +
> +	return ret;
> +}
> +
>  /*
>   * Interrupts must be disabled (for the fallback code to work right), typically
>   * by an _irqsave() lock variant. On PREEMPT_RT the preempt_disable(), which is
> @@ -519,33 +562,25 @@ static __always_inline void slab_unlock(
>   * allocation/ free operation in hardirq context. Therefore nothing can
>   * interrupt the operation.
>   */
> -static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
> +static inline bool __slab_update_freelist(struct kmem_cache *s, struct slab *slab,
>  		void *freelist_old, unsigned long counters_old,
>  		void *freelist_new, unsigned long counters_new,
>  		const char *n)
>  {
> +	bool ret;
> +
>  	if (USE_LOCKLESS_FAST_PATH())
>  		lockdep_assert_irqs_disabled();
> -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
> -    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
> +
>  	if (s->flags & __CMPXCHG_DOUBLE) {
> -		if (cmpxchg_double(&slab->freelist, &slab->counters,
> -				   freelist_old, counters_old,
> -				   freelist_new, counters_new))
> -			return true;
> -	} else
> -#endif
> -	{
> -		slab_lock(slab);
> -		if (slab->freelist == freelist_old &&
> -					slab->counters == counters_old) {
> -			slab->freelist = freelist_new;
> -			slab->counters = counters_new;
> -			slab_unlock(slab);
> -			return true;
> -		}
> -		slab_unlock(slab);
> +		ret = __update_freelist_fast(slab, freelist_old, counters_old,
> +				            freelist_new, counters_new);
> +	} else {
> +		ret = __update_freelist_slow(slab, freelist_old, counters_old,
> +				            freelist_new, counters_new);
>  	}
> +	if (likely(ret))
> +		return true;
>  
>  	cpu_relax();
>  	stat(s, CMPXCHG_DOUBLE_FAIL);
> @@ -557,36 +592,26 @@ static inline bool __cmpxchg_double_slab
>  	return false;
>  }
>  
> -static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
> +static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab,
>  		void *freelist_old, unsigned long counters_old,
>  		void *freelist_new, unsigned long counters_new,
>  		const char *n)
>  {
> -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
> -    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
> +	bool ret;
> +
>  	if (s->flags & __CMPXCHG_DOUBLE) {
> -		if (cmpxchg_double(&slab->freelist, &slab->counters,
> -				   freelist_old, counters_old,
> -				   freelist_new, counters_new))
> -			return true;
> -	} else
> -#endif
> -	{
> +		ret = __update_freelist_fast(slab, freelist_old, counters_old,
> +				            freelist_new, counters_new);
> +	} else {
>  		unsigned long flags;
>  
>  		local_irq_save(flags);
> -		slab_lock(slab);
> -		if (slab->freelist == freelist_old &&
> -					slab->counters == counters_old) {
> -			slab->freelist = freelist_new;
> -			slab->counters = counters_new;
> -			slab_unlock(slab);
> -			local_irq_restore(flags);
> -			return true;
> -		}
> -		slab_unlock(slab);
> +		ret = __update_freelist_slow(slab, freelist_old, counters_old,
> +				            freelist_new, counters_new);
>  		local_irq_restore(flags);
>  	}
> +	if (likely(ret))
> +		return true;
>  
>  	cpu_relax();
>  	stat(s, CMPXCHG_DOUBLE_FAIL);
> @@ -2229,7 +2254,7 @@ static inline void *acquire_slab(struct
>  	VM_BUG_ON(new.frozen);
>  	new.frozen = 1;
>  
> -	if (!__cmpxchg_double_slab(s, slab,
> +	if (!__slab_update_freelist(s, slab,
>  			freelist, counters,
>  			new.freelist, new.counters,
>  			"acquire_slab"))
> @@ -2555,7 +2580,7 @@ static void deactivate_slab(struct kmem_
>  	}
>  
>  
> -	if (!cmpxchg_double_slab(s, slab,
> +	if (!slab_update_freelist(s, slab,
>  				old.freelist, old.counters,
>  				new.freelist, new.counters,
>  				"unfreezing slab")) {
> @@ -2612,7 +2637,7 @@ static void __unfreeze_partials(struct k
>  
>  			new.frozen = 0;
>  
> -		} while (!__cmpxchg_double_slab(s, slab,
> +		} while (!__slab_update_freelist(s, slab,
>  				old.freelist, old.counters,
>  				new.freelist, new.counters,
>  				"unfreezing slab"));
> @@ -3009,6 +3034,18 @@ static inline bool pfmemalloc_match(stru
>  }
>  
>  #ifndef CONFIG_SLUB_TINY
> +static inline bool
> +__update_cpu_freelist_fast(struct kmem_cache *s,
> +			   void *freelist_old, void *freelist_new,
> +			   unsigned long tid)
> +{
> +	freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
> +	freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
> +
> +	return this_cpu_cmpxchg(s->cpu_slab->freelist_tid.full,
> +				old.full, new.full) == old.full;
> +}
> +
>  /*
>   * Check the slab->freelist and either transfer the freelist to the
>   * per cpu freelist or deactivate the slab.
> @@ -3035,7 +3072,7 @@ static inline void *get_freelist(struct
>  		new.inuse = slab->objects;
>  		new.frozen = freelist != NULL;
>  
> -	} while (!__cmpxchg_double_slab(s, slab,
> +	} while (!__slab_update_freelist(s, slab,
>  		freelist, counters,
>  		NULL, new.counters,
>  		"get_freelist"));
> @@ -3360,11 +3397,7 @@ static __always_inline void *__slab_allo
>  		 * against code executing on this cpu *not* from access by
>  		 * other cpus.
>  		 */
> -		if (unlikely(!this_cpu_cmpxchg_double(
> -				s->cpu_slab->freelist, s->cpu_slab->tid,
> -				object, tid,
> -				next_object, next_tid(tid)))) {
> -
> +		if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) {
>  			note_cmpxchg_failure("slab_alloc", s, tid);
>  			goto redo;
>  		}
> @@ -3632,7 +3665,7 @@ static void __slab_free(struct kmem_cach
>  			}
>  		}
>  
> -	} while (!cmpxchg_double_slab(s, slab,
> +	} while (!slab_update_freelist(s, slab,
>  		prior, counters,
>  		head, new.counters,
>  		"__slab_free"));
> @@ -3737,11 +3770,7 @@ static __always_inline void do_slab_free
>  
>  		set_freepointer(s, tail_obj, freelist);
>  
> -		if (unlikely(!this_cpu_cmpxchg_double(
> -				s->cpu_slab->freelist, s->cpu_slab->tid,
> -				freelist, tid,
> -				head, next_tid(tid)))) {
> -
> +		if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
>  			note_cmpxchg_failure("slab_free", s, tid);
>  			goto redo;
>  		}
> @@ -4505,11 +4534,12 @@ static int kmem_cache_open(struct kmem_c
>  		}
>  	}
>  
> -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
> +#if defined(system_has_freelist_aba) && \
>      defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
> -	if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
> +	if (system_has_freelist_aba() && !(s->flags & SLAB_NO_CMPXCHG)) {
>  		/* Enable fast mode */
>  		s->flags |= __CMPXCHG_DOUBLE;
> +	}
>  #endif
>  
>  	/*

Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

Thanks!

  reply	other threads:[~2023-02-08 13:32 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-02 14:50 [PATCH v2 00/10] Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() Peter Zijlstra
2023-02-02 14:50 ` [PATCH v2 01/10] cyrpto/b128ops: Remove struct u128 Peter Zijlstra
2023-02-02 20:21   ` H. Peter Anvin
2023-02-03  1:06   ` Herbert Xu
2023-02-02 14:50 ` [PATCH v2 02/10] types: Introduce [us]128 Peter Zijlstra
2023-02-03  3:37   ` Herbert Xu
2023-02-02 14:50 ` [PATCH v2 03/10] arch: Introduce arch_{,try_}_cmpxchg128{,_local}() Peter Zijlstra
2023-02-02 17:04   ` Heiko Carstens
2023-02-03 16:52   ` Mark Rutland
2023-02-02 14:50 ` [PATCH v2 04/10] instrumentation: Wire up cmpxchg128() Peter Zijlstra
2023-02-03 16:55   ` Mark Rutland
2023-02-02 14:50 ` [PATCH v2 05/10] percpu: Wire up cmpxchg128 Peter Zijlstra
2023-02-02 17:05   ` Heiko Carstens
2023-02-03 17:02   ` Mark Rutland
2023-02-03 17:25   ` Arnd Bergmann
2023-02-06 11:24     ` Peter Zijlstra
2023-02-06 12:14       ` Peter Zijlstra
2023-02-06 12:48         ` Peter Zijlstra
2023-02-06 13:20           ` Arnd Bergmann
2023-02-06 12:19       ` Arnd Bergmann
2023-02-02 14:50 ` [PATCH v2 06/10] x86,amd_iommu: Replace cmpxchg_double() Peter Zijlstra
2023-02-02 14:50 ` [PATCH v2 07/10] x86,intel_iommu: " Peter Zijlstra
2023-02-03  2:51   ` Baolu Lu
2023-02-02 14:50 ` [PATCH v2 08/10] slub: " Peter Zijlstra
2023-02-08 13:31   ` Hyeonggon Yoo [this message]
2023-02-02 14:50 ` [PATCH v2 09/10] arch: Remove cmpxchg_double Peter Zijlstra
2023-02-08 13:44   ` Hyeonggon Yoo
2023-02-02 14:50 ` [PATCH v2 10/10] s390/cpum_sf: Convert to cmpxchg128() Peter Zijlstra
2023-02-02 17:05   ` Heiko Carstens
2023-02-02 19:39 ` [PATCH v2 00/10] Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() Linus Torvalds
2023-02-02 22:45   ` David Laight

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y+OkOxpOnRYcI3DS@localhost \
    --to=42.hyeyoo@gmail.com \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=baolu.lu@linux.intel.com \
    --cc=boqun.feng@gmail.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=cl@linux.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=dennis@kernel.org \
    --cc=dwmw2@infradead.org \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=hpa@zytor.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=penberg@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rientjes@google.com \
    --cc=robin.murphy@arm.com \
    --cc=roman.gushchin@linux.dev \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=svens@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.