Re: [PATCH] xen/arm64: Assembly optimized bitops from Linux

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Tim Deegan <tim@xen.org>
To: Ian Campbell <ian.campbell@citrix.com>
Cc: julien.grall@citrix.com, stefano.stabellini@eu.citrix.com,
	xen-devel@lists.xen.org
Subject: Re: [PATCH] xen/arm64: Assembly optimized bitops from Linux
Date: Tue, 4 Jun 2013 18:20:00 +0100	[thread overview]
Message-ID: <20130604172000.GB29202@ocelot.phlegethon.org> (raw)
In-Reply-To: <1370363007-29138-1-git-send-email-ian.campbell@citrix.com>

Hi,

At 17:23 +0100 on 04 Jun (1370366607), Ian Campbell wrote:
> This patch replaces the previous hashed lock implementaiton of bitops with
> assembly optimized ones taken from Linux v3.10-rc4.

I understand that you took this code from linux, but:

> +/*
> + * x0: bits 4:0  bit offset
> + *     bits 31:5 word offset
> + * x1: address
> + */
> +	.macro	bitop, name, instr
> +ENTRY(	\name	)
> +	and	w3, w0, #31		// Get bit offset
> +	eor	w0, w0, w3		// Clear low bits

Not 'bic w0, w0, #31'?  The EOR has a dependence on the previous
instruction.  (Ditto for testop below).

Otherwise, Acked-by: Tim Deegan <tim@xen.org>

Cheers,

Tim.

> +	mov	x2, #1
> +	add	x1, x1, x0, lsr #2	// Get word offset
> +	lsl	x3, x2, x3		// Create mask
> +1:	ldxr	w2, [x1]
> +	\instr	w2, w2, w3
> +	stxr	w0, w2, [x1]
> +	cbnz	w0, 1b
> +	ret
> +ENDPROC(\name	)
> +	.endm
> +
> +	.macro	testop, name, instr
> +ENTRY(	\name	)
> +	and	w3, w0, #31		// Get bit offset
> +	eor	w0, w0, w3		// Clear low bits
> +	mov	x2, #1
> +	add	x1, x1, x0, lsr #2	// Get word offset
> +	lsl	x4, x2, x3		// Create mask
> +1:	ldaxr	w2, [x1]
> +	lsr	w0, w2, w3		// Save old value of bit
> +	\instr	w2, w2, w4		// toggle bit
> +	stlxr	w5, w2, [x1]
> +	cbnz	w5, 1b
> +	and	w0, w0, #1
> +3:	ret
> +ENDPROC(\name	)
> +	.endm
> +
> +/*
> + * Atomic bit operations.
> + */
> +	bitop	change_bit, eor
> +	bitop	clear_bit, bic
> +	bitop	set_bit, orr
> +
> +	testop	test_and_change_bit, eor
> +	testop	test_and_clear_bit, bic
> +	testop	test_and_set_bit, orr
> diff --git a/xen/arch/arm/arm64/lib/bitops.c b/xen/arch/arm/arm64/lib/bitops.c
> deleted file mode 100644
> index 02d8d78..0000000
> --- a/xen/arch/arm/arm64/lib/bitops.c
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -/*
> - * Copyright (C) 2012 ARM Limited
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> - * GNU General Public License for more details.
> - *
> - * You should have received a copy of the GNU General Public License
> - * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> - */
> -
> -#include <xen/spinlock.h>
> -#include <xen/bitops.h>
> -
> -spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] /*__lock_aligned*/ = {
> -       [0 ... (ATOMIC_HASH_SIZE-1)]  = SPIN_LOCK_UNLOCKED
> -};
> diff --git a/xen/include/asm-arm/arm64/bitops.h b/xen/include/asm-arm/arm64/bitops.h
> index 0a6eba3..b43931d 100644
> --- a/xen/include/asm-arm/arm64/bitops.h
> +++ b/xen/include/asm-arm/arm64/bitops.h
> @@ -1,200 +1,15 @@
>  #ifndef _ARM_ARM64_BITOPS_H
>  #define _ARM_ARM64_BITOPS_H
>  
> -/* Generic bitop support. Based on linux/include/asm-generic/bitops/atomic.h */
> -
> -#include <xen/spinlock.h>
> -#include <xen/cache.h>          /* we use L1_CACHE_BYTES */
> -
> -/* Use an array of spinlocks for our atomic_ts.
> - * Hash function to index into a different SPINLOCK.
> - * Since "a" is usually an address, use one spinlock per cacheline.
> - */
> -#  define ATOMIC_HASH_SIZE 4
> -#  define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
> -
> -extern spinlock_t __atomic_hash[ATOMIC_HASH_SIZE]/* __lock_aligned*/;
> -
> -#define _atomic_spin_lock_irqsave(l,f) do {     \
> -       spinlock_t *s = ATOMIC_HASH(l);          \
> -       spin_lock_irqsave(s, f);\
> -} while(0)
> -
> -#define _atomic_spin_unlock_irqrestore(l,f) do {\
> -        spinlock_t *s = ATOMIC_HASH(l);         \
> -        spin_unlock_irqrestore(s,f);		\
> -} while(0)
> -
> -#define FIXUP(_p, _mask)                        \
> -    {                                           \
> -        unsigned long __p = (unsigned long)_p;  \
> -        if (__p & 0x7) {                        \
> -            if (_mask > 0xffffffff) {           \
> -             __p = (__p+32)&~0x7; _mask >>=32;  \
> -            } else {                            \
> -                __p &= ~0x7; _mask <<= 32;      \
> -            }                                   \
> -            if (0)printk("BITOPS: Fixup misaligned ptr %p => %#lx\n", _p, __p); \
> -            _p = (void *)__p;                   \
> -        }                                       \
> -    }
> -
> -/**
> - * set_bit - Atomically set a bit in memory
> - * @nr: the bit to set
> - * @addr: the address to start counting from
> - *
> - * This function is atomic and may not be reordered.  See __set_bit()
> - * if you do not require the atomic guarantees.
> - *
> - * Note: there are no guarantees that this function will not be reordered
> - * on non x86 architectures, so if you are writing portable code,
> - * make sure not to rely on its reordering guarantees.
> - *
> - * Note that @nr may be almost arbitrarily large; this function is not
> - * restricted to acting on a single-word quantity.
> - */
> -
> -static inline void set_bit(int nr, volatile void *addr)
> -{
> -	unsigned long mask = BIT_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
> -	unsigned long flags;
> -
> -        //printk("set_bit: nr %d addr %p mask %#lx p %p lock %p\n",
> -        //       nr, addr, mask, p, ATOMIC_HASH(p));
> -        FIXUP(p, mask);
> -        //printk("set_bit: nr %d addr %p mask %#lx p %p lock %p\n",
> -        //       nr, addr, mask, p, ATOMIC_HASH(p));
> -        //printk("before *p is %#lx\n", *p);
> -	_atomic_spin_lock_irqsave(p, flags);
> -	*p  |= mask;
> -	_atomic_spin_unlock_irqrestore(p, flags);
> -        //printk(" after *p is %#lx\n", *p);
> -}
> -
> -/**
> - * clear_bit - Clears a bit in memory
> - * @nr: Bit to clear
> - * @addr: Address to start counting from
> - *
> - * clear_bit() is atomic and may not be reordered.  However, it does
> - * not contain a memory barrier, so if it is used for locking purposes,
> - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
> - * in order to ensure changes are visible on other processors.
> - */
> -static inline void clear_bit(int nr, volatile void *addr)
> -{
> -	unsigned long mask = BIT_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
> -	unsigned long flags;
> -
> -        FIXUP(p, mask);
> -
> -	_atomic_spin_lock_irqsave(p, flags);
> -	*p &= ~mask;
> -	_atomic_spin_unlock_irqrestore(p, flags);
> -}
> -
> -/**
> - * change_bit - Toggle a bit in memory
> - * @nr: Bit to change
> - * @addr: Address to start counting from
> - *
> - * change_bit() is atomic and may not be reordered. It may be
> - * reordered on other architectures than x86.
> - * Note that @nr may be almost arbitrarily large; this function is not
> - * restricted to acting on a single-word quantity.
> - */
> -static inline void change_bit(int nr, volatile void *addr)
> -{
> -	unsigned long mask = BIT_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
> -	unsigned long flags;
> -
> -        FIXUP(p, mask);
> -
> -	_atomic_spin_lock_irqsave(p, flags);
> -	*p ^= mask;
> -	_atomic_spin_unlock_irqrestore(p, flags);
> -}
> -
> -/**
> - * test_and_set_bit - Set a bit and return its old value
> - * @nr: Bit to set
> - * @addr: Address to count from
> - *
> - * This operation is atomic and cannot be reordered.
> - * It may be reordered on other architectures than x86.
> - * It also implies a memory barrier.
> - */
> -static inline int test_and_set_bit(int nr, volatile void *addr)
> -{
> -	unsigned long mask = BIT_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
> -	unsigned long old;
> -	unsigned long flags;
> -
> -        FIXUP(p, mask);
> -
> -	_atomic_spin_lock_irqsave(p, flags);
> -	old = *p;
> -	*p = old | mask;
> -	_atomic_spin_unlock_irqrestore(p, flags);
> -
> -	return (old & mask) != 0;
> -}
> -
> -/**
> - * test_and_clear_bit - Clear a bit and return its old value
> - * @nr: Bit to clear
> - * @addr: Address to count from
> - *
> - * This operation is atomic and cannot be reordered.
> - * It can be reorderdered on other architectures other than x86.
> - * It also implies a memory barrier.
> - */
> -static inline int test_and_clear_bit(int nr, volatile void *addr)
> -{
> -	unsigned long mask = BIT_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
> -	unsigned long old;
> -	unsigned long flags;
> -
> -        FIXUP(p, mask);
> -
> -	_atomic_spin_lock_irqsave(p, flags);
> -	old = *p;
> -	*p = old & ~mask;
> -	_atomic_spin_unlock_irqrestore(p, flags);
> -
> -	return (old & mask) != 0;
> -}
> -
> -/**
> - * test_and_change_bit - Change a bit and return its old value
> - * @nr: Bit to change
> - * @addr: Address to count from
> - *
> - * This operation is atomic and cannot be reordered.
> - * It also implies a memory barrier.
> +/*
> + * Little endian assembly atomic bitops.
>   */
> -static inline int test_and_change_bit(int nr, volatile void *addr)
> -{
> -	unsigned long mask = BIT_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
> -	unsigned long old;
> -	unsigned long flags;
> -
> -        FIXUP(p, mask);
> -
> -	_atomic_spin_lock_irqsave(p, flags);
> -	old = *p;
> -	*p = old ^ mask;
> -	_atomic_spin_unlock_irqrestore(p, flags);
> -
> -	return (old & mask) != 0;
> -}
> +extern void set_bit(int nr, volatile void *p);
> +extern void clear_bit(int nr, volatile void *p);
> +extern void change_bit(int nr, volatile void *p);
> +extern int test_and_set_bit(int nr, volatile void *p);
> +extern int test_and_clear_bit(int nr, volatile void *p);
> +extern int test_and_change_bit(int nr, volatile void *p);
>  
>  /* Based on linux/include/asm-generic/bitops/builtin-__ffs.h */
>  /**
> @@ -217,8 +32,6 @@ static /*__*/always_inline unsigned long __ffs(unsigned long word)
>   */
>  #define ffz(x)  __ffs(~(x))
>  
> -
> -
>  /* Based on linux/include/asm-generic/bitops/find.h */
>  
>  #ifndef find_next_bit
> -- 
> 1.7.2.5
>

next prev parent reply	other threads:[~2013-06-04 17:20 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-04 16:23 [PATCH] xen/arm64: Assembly optimized bitops from Linux Ian Campbell
2013-06-04 17:20 ` Tim Deegan [this message]
2013-06-05  9:18   ` Ian Campbell
2013-06-05 10:16     ` Tim Deegan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130604172000.GB29202@ocelot.phlegethon.org \
    --to=tim@xen.org \
    --cc=ian.campbell@citrix.com \
    --cc=julien.grall@citrix.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).