All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yury Norov <yury.norov@gmail.com>
To: Sander Vanheule <sander@svanheule.net>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Valentin Schneider <vschneid@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Marco Elver <elver@google.com>,
	Barry Song <song.bao.hua@hisilicon.com>,
	linux-kernel@vger.kernel.org,
	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Subject: Re: [PATCH v1 1/2] cpumask: Fix invalid uniprocessor mask assumption
Date: Thu, 2 Jun 2022 15:48:16 -0700	[thread overview]
Message-ID: <Ypk+MKSSaSCBnITY@yury-laptop> (raw)
In-Reply-To: <017b97698ba58d33bf45d30317d5a73c5b93d2a0.1654201862.git.sander@svanheule.net>

On Thu, Jun 02, 2022 at 11:04:19PM +0200, Sander Vanheule wrote:
> On uniprocessor builds, any CPU mask is assumed to contain exactly one
> CPU (cpu0). This ignores the existence of empty masks, resulting in
> incorrect behaviour for most of the implemented optimisations.
> 
> Replace the uniprocessor optimisations with implementations that also
> take into account empty masks. Also drop the incorrectly optimised
> for_each_cpu implementations and use the generic implementations in all
> cases.
> 
> Signed-off-by: Sander Vanheule <sander@svanheule.net>
> ---
>  include/linux/cpumask.h | 35 +++++++++++++++--------------------
>  1 file changed, 15 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index fe29ac7cc469..ce8c7b27f6c9 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -117,51 +117,54 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
>  }
>  
>  #if NR_CPUS == 1
> -/* Uniprocessor.  Assume all masks are "1". */
> +/* Uniprocessor. Assume all valid masks are "1" or empty. */
>  static inline unsigned int cpumask_first(const struct cpumask *srcp)
>  {
> -	return 0;
> +	return !(*cpumask_bits(srcp) & 1);
>  }
 
I think, you can just drop this '#if NR_CPUS == 1' part and rely on SMP
versions. find_first_bit() is optimized for short bitmaps, so I expect
no overhead comparing to this.
  
>  static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
>  {
> -	return 0;
> +	return *cpumask_bits(srcp) & 1;
>  }
>  
>  static inline unsigned int cpumask_first_and(const struct cpumask *srcp1,
>  					     const struct cpumask *srcp2)
>  {
> -	return 0;
> +	return !(*cpumask_bits(srcp1) & *cpumask_bits(srcp2) & 1);
>  }
>  
>  static inline unsigned int cpumask_last(const struct cpumask *srcp)
>  {
> -	return 0;
> +	return cpumask_first(srcp);
>  }
>  
>  /* Valid inputs for n are -1 and 0. */
>  static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
>  {
> -	return n+1;
> +	return !!(n + 1 + cpumask_first(srcp));
>  }
>  
>  static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
>  {
> -	return n+1;
> +	return !!(n + 1 + cpumask_first_zero(srcp));
>  }
>  
>  static inline unsigned int cpumask_next_and(int n,
>  					    const struct cpumask *srcp,
>  					    const struct cpumask *andp)
>  {
> -	return n+1;
> +	return !!(n + 1 + cpumask_first_and(srcp, andp));
>  }
>  
>  static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask,
>  					     int start, bool wrap)
>  {
> -	/* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */
> -	return (wrap && n == 0);
> +	/*
> +	 * nr_cpumask_bits at stop condition, wrap and at cpu0, or empty mask
> +	 * otherwise cpu0
> +	 */
> +	return (wrap && n == 0) || cpumask_first(mask);
>  }
>  
>  /* cpu must be a valid cpu, ie 0, so there's no other choice. */
> @@ -186,14 +189,6 @@ static inline int cpumask_any_distribute(const struct cpumask *srcp)
>  	return cpumask_first(srcp);
>  }
>  
> -#define for_each_cpu(cpu, mask)			\
> -	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> -#define for_each_cpu_not(cpu, mask)		\
> -	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> -#define for_each_cpu_wrap(cpu, mask, start)	\
> -	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
> -#define for_each_cpu_and(cpu, mask1, mask2)	\
> -	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2)
>  #else
>  /**
>   * cpumask_first - get the first cpu in a cpumask
> @@ -259,11 +254,13 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
>  }
>  
>  int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
> +extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);

is this extern needed?

Thanks,
Yury

>  int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
>  unsigned int cpumask_local_spread(unsigned int i, int node);
>  int cpumask_any_and_distribute(const struct cpumask *src1p,
>  			       const struct cpumask *src2p);
>  int cpumask_any_distribute(const struct cpumask *srcp);
> +#endif /* SMP */
>  
>  /**
>   * for_each_cpu - iterate over every cpu in a mask
> @@ -289,7 +286,6 @@ int cpumask_any_distribute(const struct cpumask *srcp);
>  		(cpu) = cpumask_next_zero((cpu), (mask)),	\
>  		(cpu) < nr_cpu_ids;)
>  
> -extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
>  
>  /**
>   * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
> @@ -324,7 +320,6 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
>  	for ((cpu) = -1;						\
>  		(cpu) = cpumask_next_and((cpu), (mask1), (mask2)),	\
>  		(cpu) < nr_cpu_ids;)
> -#endif /* SMP */
>  
>  #define CPU_BITS_NONE						\
>  {								\
> -- 
> 2.36.1

  reply	other threads:[~2022-06-02 22:50 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-02 21:04 [PATCH v1 0/2] cpumask: Fix invalid uniprocessor assumptions Sander Vanheule
2022-06-02 21:04 ` [PATCH v1 1/2] cpumask: Fix invalid uniprocessor mask assumption Sander Vanheule
2022-06-02 22:48   ` Yury Norov [this message]
2022-06-03 15:13     ` Sander Vanheule
2022-06-02 21:04 ` [PATCH v1 2/2] cpumask: Add UP optimised for_each_*_cpu loops Sander Vanheule
  -- strict thread matches above, loose matches on Subject: below --
2022-06-03  5:35 [PATCH v1 1/2] cpumask: Fix invalid uniprocessor mask assumption kernel test robot
2022-06-04 12:02 kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Ypk+MKSSaSCBnITY@yury-laptop \
    --to=yury.norov@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andriy.shevchenko@linux.intel.com \
    --cc=elver@google.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=sander@svanheule.net \
    --cc=song.bao.hua@hisilicon.com \
    --cc=tglx@linutronix.de \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.