All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: use tzcnt instead of bsf
@ 2015-01-23 16:39 Jan Beulich
  2015-01-26  9:49 ` Andrew Cooper
  0 siblings, 1 reply; 2+ messages in thread
From: Jan Beulich @ 2015-01-23 16:39 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 1798 bytes --]

Following a compiler change done in 2012, make use of the fact that for
non-zero input BSF and TZCNT produce the same numeric result (EFLAGS
setting differs), and that CPUs not knowing of TZCNT will treat the
instruction as BSF (i.e. ignore what looks like a REP prefix to them).
The assumption here is that TZCNT would never have worse performance
than BSF.

Also extend the asm() input in find_first_set_bit() to allow memory
operands.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Thanks to Andrew for noticing that I forgot to post this for Xen after
a similar change got accepted into the Linux kernel.

--- a/xen/arch/x86/bitops.c
+++ b/xen/arch/x86/bitops.c
@@ -62,7 +62,7 @@ unsigned int __find_first_zero_bit(
         "   je 2f\n\t"
         "   xor -"STR(BITS_PER_LONG/8)"(%2),%3\n\t"
         "   jz 1b\n\t"
-        "   bsf %3,%0\n\t"
+        "   rep; bsf %3,%0\n\t"
         "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
         "2: sub %%ebx,%%edi\n\t"
         "   shl $3,%%edi\n\t"
--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -56,7 +56,7 @@ static int vpic_get_priority(struct hvm_
         return VPIC_PRIO_NONE;
 
     /* prio = ffs(mask ROR vpic->priority_add); */
-    asm ( "ror %%cl,%b1 ; bsf %1,%0"
+    asm ( "ror %%cl,%b1 ; rep; bsf %1,%0"
           : "=r" (prio) : "q" ((uint32_t)mask), "c" (vpic->priority_add) );
     return prio;
 }
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -382,7 +382,7 @@ static inline unsigned int __scanbit(uns
  */
 static inline unsigned int find_first_set_bit(unsigned long word)
 {
-    asm ( "bsf %1,%0" : "=r" (word) : "r" (word) );
+    asm ( "rep; bsf %1,%0" : "=r" (word) : "rm" (word) );
     return (unsigned int)word;
 }
 




[-- Attachment #2: x86-use-tzcnt.patch --]
[-- Type: text/plain, Size: 1825 bytes --]

x86: use tzcnt instead of bsf

Following a compiler change done in 2012, make use of the fact that for
non-zero input BSF and TZCNT produce the same numeric result (EFLAGS
setting differs), and that CPUs not knowing of TZCNT will treat the
instruction as BSF (i.e. ignore what looks like a REP prefix to them).
The assumption here is that TZCNT would never have worse performance
than BSF.

Also extend the asm() input in find_first_set_bit() to allow memory
operands.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Thanks to Andrew for noticing that I forgot to post this for Xen after
a similar change got accepted into the Linux kernel.

--- a/xen/arch/x86/bitops.c
+++ b/xen/arch/x86/bitops.c
@@ -62,7 +62,7 @@ unsigned int __find_first_zero_bit(
         "   je 2f\n\t"
         "   xor -"STR(BITS_PER_LONG/8)"(%2),%3\n\t"
         "   jz 1b\n\t"
-        "   bsf %3,%0\n\t"
+        "   rep; bsf %3,%0\n\t"
         "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
         "2: sub %%ebx,%%edi\n\t"
         "   shl $3,%%edi\n\t"
--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -56,7 +56,7 @@ static int vpic_get_priority(struct hvm_
         return VPIC_PRIO_NONE;
 
     /* prio = ffs(mask ROR vpic->priority_add); */
-    asm ( "ror %%cl,%b1 ; bsf %1,%0"
+    asm ( "ror %%cl,%b1 ; rep; bsf %1,%0"
           : "=r" (prio) : "q" ((uint32_t)mask), "c" (vpic->priority_add) );
     return prio;
 }
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -382,7 +382,7 @@ static inline unsigned int __scanbit(uns
  */
 static inline unsigned int find_first_set_bit(unsigned long word)
 {
-    asm ( "bsf %1,%0" : "=r" (word) : "r" (word) );
+    asm ( "rep; bsf %1,%0" : "=r" (word) : "rm" (word) );
     return (unsigned int)word;
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] x86: use tzcnt instead of bsf
  2015-01-23 16:39 [PATCH] x86: use tzcnt instead of bsf Jan Beulich
@ 2015-01-26  9:49 ` Andrew Cooper
  0 siblings, 0 replies; 2+ messages in thread
From: Andrew Cooper @ 2015-01-26  9:49 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 23/01/15 16:39, Jan Beulich wrote:
> Following a compiler change done in 2012, make use of the fact that for
> non-zero input BSF and TZCNT produce the same numeric result (EFLAGS
> setting differs), and that CPUs not knowing of TZCNT will treat the
> instruction as BSF (i.e. ignore what looks like a REP prefix to them).
> The assumption here is that TZCNT would never have worse performance
> than BSF.
>
> Also extend the asm() input in find_first_set_bit() to allow memory
> operands.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> ---
> Thanks to Andrew for noticing that I forgot to post this for Xen after
> a similar change got accepted into the Linux kernel.
>
> --- a/xen/arch/x86/bitops.c
> +++ b/xen/arch/x86/bitops.c
> @@ -62,7 +62,7 @@ unsigned int __find_first_zero_bit(
>          "   je 2f\n\t"
>          "   xor -"STR(BITS_PER_LONG/8)"(%2),%3\n\t"
>          "   jz 1b\n\t"
> -        "   bsf %3,%0\n\t"
> +        "   rep; bsf %3,%0\n\t"
>          "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
>          "2: sub %%ebx,%%edi\n\t"
>          "   shl $3,%%edi\n\t"
> --- a/xen/arch/x86/hvm/vpic.c
> +++ b/xen/arch/x86/hvm/vpic.c
> @@ -56,7 +56,7 @@ static int vpic_get_priority(struct hvm_
>          return VPIC_PRIO_NONE;
>  
>      /* prio = ffs(mask ROR vpic->priority_add); */
> -    asm ( "ror %%cl,%b1 ; bsf %1,%0"
> +    asm ( "ror %%cl,%b1 ; rep; bsf %1,%0"
>            : "=r" (prio) : "q" ((uint32_t)mask), "c" (vpic->priority_add) );
>      return prio;
>  }
> --- a/xen/include/asm-x86/bitops.h
> +++ b/xen/include/asm-x86/bitops.h
> @@ -382,7 +382,7 @@ static inline unsigned int __scanbit(uns
>   */
>  static inline unsigned int find_first_set_bit(unsigned long word)
>  {
> -    asm ( "bsf %1,%0" : "=r" (word) : "r" (word) );
> +    asm ( "rep; bsf %1,%0" : "=r" (word) : "rm" (word) );
>      return (unsigned int)word;
>  }
>  
>
>
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-01-26  9:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-23 16:39 [PATCH] x86: use tzcnt instead of bsf Jan Beulich
2015-01-26  9:49 ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.